From e8dee096127ebfd9567208163eedd0d468c8d7dc Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 21 Apr 2025 12:50:12 +0200 Subject: [PATCH 01/21] JIT changes --- src/coreclr/jit/CMakeLists.txt | 2 + src/coreclr/jit/ICorJitInfo_names_generated.h | 2 + .../jit/ICorJitInfo_wrapper_generated.hpp | 16 + src/coreclr/jit/async.cpp | 1956 +++++++++++++++++ src/coreclr/jit/async.h | 152 ++ src/coreclr/jit/block.cpp | 1 + src/coreclr/jit/block.h | 15 +- src/coreclr/jit/codegen.h | 3 + src/coreclr/jit/codegenarm.cpp | 9 + src/coreclr/jit/codegenarmarch.cpp | 12 +- src/coreclr/jit/codegencommon.cpp | 103 +- src/coreclr/jit/codegenlinear.cpp | 7 +- src/coreclr/jit/codegenloongarch64.cpp | 3 +- src/coreclr/jit/codegenriscv64.cpp | 3 +- src/coreclr/jit/codegenxarch.cpp | 24 +- src/coreclr/jit/compiler.cpp | 15 +- src/coreclr/jit/compiler.h | 50 +- src/coreclr/jit/compiler.hpp | 16 +- src/coreclr/jit/compmemkind.h | 1 + src/coreclr/jit/compphases.h | 1 + src/coreclr/jit/emit.cpp | 14 +- src/coreclr/jit/emit.h | 13 + src/coreclr/jit/emitarm.cpp | 8 +- src/coreclr/jit/emitarm.h | 13 +- src/coreclr/jit/emitarm64.cpp | 9 +- src/coreclr/jit/emitarm64.h | 6 +- src/coreclr/jit/emitloongarch64.cpp | 9 +- src/coreclr/jit/emitloongarch64.h | 6 +- src/coreclr/jit/emitriscv64.cpp | 9 +- src/coreclr/jit/emitriscv64.h | 6 +- src/coreclr/jit/emitxarch.cpp | 14 +- src/coreclr/jit/emitxarch.h | 6 +- src/coreclr/jit/fgbasic.cpp | 4 +- src/coreclr/jit/fgdiagnostic.cpp | 18 +- src/coreclr/jit/fginline.cpp | 19 +- src/coreclr/jit/fgstmt.cpp | 1 + src/coreclr/jit/flowgraph.cpp | 13 +- src/coreclr/jit/forwardsub.cpp | 2 +- src/coreclr/jit/gentree.cpp | 64 +- src/coreclr/jit/gentree.h | 7 + src/coreclr/jit/gtlist.h | 6 + src/coreclr/jit/importer.cpp | 126 +- src/coreclr/jit/importercalls.cpp | 122 +- src/coreclr/jit/inline.def | 1 + src/coreclr/jit/jitconfigvalues.h | 2 + src/coreclr/jit/jitee.h | 2 +- src/coreclr/jit/layout.cpp | 25 + src/coreclr/jit/layout.h | 2 + src/coreclr/jit/lclvars.cpp | 44 +- src/coreclr/jit/lir.h | 14 + src/coreclr/jit/liveness.cpp | 1 + src/coreclr/jit/lower.cpp | 73 + src/coreclr/jit/lower.h | 2 + src/coreclr/jit/lsra.h | 3 +- src/coreclr/jit/lsraarm.cpp | 7 + src/coreclr/jit/lsraarm64.cpp | 5 + src/coreclr/jit/lsraarmarch.cpp | 5 + src/coreclr/jit/lsrabuild.cpp | 25 +- src/coreclr/jit/lsraloongarch64.cpp | 10 + src/coreclr/jit/lsrariscv64.cpp | 10 + src/coreclr/jit/lsraxarch.cpp | 10 + src/coreclr/jit/morph.cpp | 9 + src/coreclr/jit/namedintrinsiclist.h | 6 + src/coreclr/jit/optcse.cpp | 119 +- src/coreclr/jit/patchpoint.cpp | 5 +- src/coreclr/jit/targetamd64.h | 3 + src/coreclr/jit/targetarm.h | 3 + src/coreclr/jit/targetarm64.h | 3 + src/coreclr/jit/targetloongarch64.h | 3 + src/coreclr/jit/targetriscv64.h | 3 + src/coreclr/jit/targetx86.h | 3 + src/coreclr/jit/valuenum.cpp | 9 +- 72 files changed, 3175 insertions(+), 118 deletions(-) create mode 100644 src/coreclr/jit/async.cpp create mode 100644 src/coreclr/jit/async.h diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 288edf637a6dd4..c0939484ff5701 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -88,6 +88,7 @@ endif(CLR_CMAKE_TARGET_WIN32) set( JIT_SOURCES abi.cpp alloc.cpp + async.cpp assertionprop.cpp bitset.cpp block.cpp @@ -285,6 +286,7 @@ set( JIT_HEADERS _typeinfo.h abi.h alloc.h + async.h arraystack.h bitset.h layout.h diff --git a/src/coreclr/jit/ICorJitInfo_names_generated.h b/src/coreclr/jit/ICorJitInfo_names_generated.h index 94e244c0749bfa..ca1fed8c1c6592 100644 --- a/src/coreclr/jit/ICorJitInfo_names_generated.h +++ b/src/coreclr/jit/ICorJitInfo_names_generated.h @@ -124,6 +124,7 @@ DEF_CLR_API(getHFAType) DEF_CLR_API(runWithErrorTrap) DEF_CLR_API(runWithSPMIErrorTrap) DEF_CLR_API(getEEInfo) +DEF_CLR_API(getAsyncInfo) DEF_CLR_API(getMethodDefFromMethod) DEF_CLR_API(printMethodName) DEF_CLR_API(getMethodNameFromMetadata) @@ -161,6 +162,7 @@ DEF_CLR_API(getFieldThreadLocalStoreID) DEF_CLR_API(GetDelegateCtor) DEF_CLR_API(MethodCompileComplete) DEF_CLR_API(getTailCallHelpers) +DEF_CLR_API(getAsyncResumptionStub) DEF_CLR_API(convertPInvokeCalliToCall) DEF_CLR_API(notifyInstructionSetUsage) DEF_CLR_API(updateEntryPointForTailCall) diff --git a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp index 9c7e6c1099826d..131698d8d495f2 100644 --- a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp +++ b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp @@ -1181,6 +1181,14 @@ void WrapICorJitInfo::getEEInfo( API_LEAVE(getEEInfo); } +void WrapICorJitInfo::getAsyncInfo( + CORINFO_ASYNC_INFO* pAsyncInfoOut) +{ + API_ENTER(getAsyncInfo); + wrapHnd->getAsyncInfo(pAsyncInfoOut); + API_LEAVE(getAsyncInfo); +} + mdMethodDef WrapICorJitInfo::getMethodDefFromMethod( CORINFO_METHOD_HANDLE hMethod) { @@ -1555,6 +1563,14 @@ bool WrapICorJitInfo::getTailCallHelpers( return temp; } +CORINFO_METHOD_HANDLE WrapICorJitInfo::getAsyncResumptionStub() +{ + API_ENTER(getAsyncResumptionStub); + CORINFO_METHOD_HANDLE temp = wrapHnd->getAsyncResumptionStub(); + API_LEAVE(getAsyncResumptionStub); + return temp; +} + bool WrapICorJitInfo::convertPInvokeCalliToCall( CORINFO_RESOLVED_TOKEN* pResolvedToken, bool mustConvert) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp new file mode 100644 index 00000000000000..114386b675be3b --- /dev/null +++ b/src/coreclr/jit/async.cpp @@ -0,0 +1,1956 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" +#include "jitstd/algorithm.h" +#include "async.h" + +class AsyncLiveness +{ + Compiler* m_comp; + bool m_hasLiveness; + TreeLifeUpdater m_updater; + unsigned m_numVars; + +public: + AsyncLiveness(Compiler* comp, bool hasLiveness) + : m_comp(comp) + , m_hasLiveness(hasLiveness) + , m_updater(comp) + , m_numVars(comp->lvaCount) + { + } + + void StartBlock(BasicBlock* block); + void Update(GenTree* node); + bool IsLive(unsigned lclNum); + void GetLiveLocals(jitstd::vector& liveLocals, unsigned fullyDefinedRetBufLcl); + +private: + bool IsLocalCaptureUnnecessary(unsigned lclNum); +}; + +//------------------------------------------------------------------------ +// AsyncLiveness::StartBlock: +// Indicate that we are now starting a new block, and do relevant liveness +// updates for it. +// +// Parameters: +// block - The block that we are starting. +// +void AsyncLiveness::StartBlock(BasicBlock* block) +{ + if (!m_hasLiveness) + return; + + VarSetOps::Assign(m_comp, m_comp->compCurLife, block->bbLiveIn); +} + +//------------------------------------------------------------------------ +// AsyncLiveness::Update: +// Update liveness to be consistent with the specified node having been +// executed. +// +// Parameters: +// node - The node. +// +void AsyncLiveness::Update(GenTree* node) +{ + if (!m_hasLiveness) + return; + + m_updater.UpdateLife(node); +} + +//------------------------------------------------------------------------ +// AsyncLiveness::IsLocalCaptureUnnecessary: +// Check if capturing a specified local can be skipped. +// +// Parameters: +// lclNum - The local +// +// Returns: +// True if the local should not be captured. Even without liveness +// +bool AsyncLiveness::IsLocalCaptureUnnecessary(unsigned lclNum) +{ +#if FEATURE_FIXED_OUT_ARGS + if (lclNum == m_comp->lvaOutgoingArgSpaceVar) + { + return true; + } +#endif + + if (lclNum == m_comp->info.compRetBuffArg) + { + return true; + } + + if (lclNum == m_comp->lvaGSSecurityCookie) + { + // Initialized in prolog + return true; + } + + if (lclNum == m_comp->lvaPSPSym) + { + // Initialized in prolog + return true; + } + + if (lclNum == m_comp->info.compLvFrameListRoot) + { + return true; + } + + if (lclNum == m_comp->lvaInlinedPInvokeFrameVar) + { + return true; + } + +#ifdef FEATURE_EH_WINDOWS_X86 + if (lclNum == m_comp->lvaShadowSPslotsVar) + { + // Only expected to be live in handlers + return true; + } +#endif + + if (lclNum == m_comp->lvaRetAddrVar) + { + return true; + } + + if (lclNum == m_comp->lvaAsyncContinuationArg) + { + return true; + } + + return false; +} + +//------------------------------------------------------------------------ +// AsyncLiveness::IsLive: +// Check if the specified local is live at this point and should be captured. +// +// Parameters: +// lclNum - The local +// +// Returns: +// True if the local is live and capturing it is necessary. +// +bool AsyncLiveness::IsLive(unsigned lclNum) +{ + if (IsLocalCaptureUnnecessary(lclNum)) + { + return false; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(lclNum); + + if (((dsc->TypeGet() == TYP_BYREF) && !dsc->IsImplicitByRef()) || + ((dsc->TypeGet() == TYP_STRUCT) && dsc->GetLayout()->HasGCByRef())) + { + // Even if these are address exposed we expect them to be dead at + // suspension points. TODO: It would be good to somehow verify these + // aren't obviously live, if the JIT creates live ranges that span a + // suspension point then this makes it quite hard to diagnose that. + return false; + } + + if (!m_hasLiveness) + { + return true; + } + + if (dsc->lvRefCnt(RCS_NORMAL) == 0) + { + return false; + } + + Compiler::lvaPromotionType promoType = m_comp->lvaGetPromotionType(dsc); + if (promoType == Compiler::PROMOTION_TYPE_INDEPENDENT) + { + // Independently promoted structs are handled only through their + // fields. + return false; + } + + if (promoType == Compiler::PROMOTION_TYPE_DEPENDENT) + { + // Dependently promoted structs are handled only through the base + // struct local. + // + // A dependently promoted struct is live if any of its fields are live. + + for (unsigned i = 0; i < dsc->lvFieldCnt; i++) + { + LclVarDsc* fieldDsc = m_comp->lvaGetDesc(dsc->lvFieldLclStart + i); + if (!fieldDsc->lvTracked || VarSetOps::IsMember(m_comp, m_comp->compCurLife, fieldDsc->lvVarIndex)) + { + return true; + } + } + + return false; + } + + if (dsc->lvIsStructField && (m_comp->lvaGetParentPromotionType(dsc) == Compiler::PROMOTION_TYPE_DEPENDENT)) + { + return false; + } + + return !dsc->lvTracked || VarSetOps::IsMember(m_comp, m_comp->compCurLife, dsc->lvVarIndex); +} + +//------------------------------------------------------------------------ +// AsyncLiveness::GetLiveLocals: +// Get live locals that should be captured at this point. +// +// Parameters: +// liveLocals - Vector to add live local information into +// fullyDefinedRetBufLcl - Local to skip even if live +// +void AsyncLiveness::GetLiveLocals(jitstd::vector& liveLocals, unsigned fullyDefinedRetBufLcl) +{ + for (unsigned lclNum = 0; lclNum < m_numVars; lclNum++) + { + if ((lclNum != fullyDefinedRetBufLcl) && IsLive(lclNum)) + { + liveLocals.push_back(LiveLocalInfo(lclNum)); + } + } +} + +//------------------------------------------------------------------------ +// TransformAsync: Run async transformation. +// +// Returns: +// Suitable phase status. +// +PhaseStatus Compiler::TransformAsync() +{ + assert(compIsAsync()); + + AsyncTransformation transformation(this); + return transformation.Run(); +} + +//------------------------------------------------------------------------ +// AsyncTransformation::Run: +// Run the transformation over all the IR. +// +// Returns: +// Suitable phase status. +// +PhaseStatus AsyncTransformation::Run() +{ + ArrayStack worklist(m_comp->getAllocator(CMK_Async)); + + // First find all basic blocks with awaits in them. We'll have to track + // liveness in these basic blocks, so it does not help to record the calls + // ahead of time. + for (BasicBlock* block : m_comp->Blocks()) + { + for (GenTree* tree : LIR::AsRange(block)) + { + if (tree->IsCall() && tree->AsCall()->IsAsync() && !tree->AsCall()->IsTailCall()) + { + JITDUMP(FMT_BB " contains await(s)\n", block->bbNum); + worklist.Push(block); + break; + } + } + } + + JITDUMP("Found %d blocks with awaits\n", worklist.Height()); + + if (worklist.Height() <= 0) + { + return PhaseStatus::MODIFIED_NOTHING; + } + + // Ask the VM to create a resumption stub for this specific version of the + // code. It is stored in the continuation as a function pointer, so we need + // the fixed entry point here. + m_resumeStub = m_comp->info.compCompHnd->getAsyncResumptionStub(); + m_comp->info.compCompHnd->getFunctionFixedEntryPoint(m_resumeStub, false, &m_resumeStubLookup); + + m_returnedContinuationVar = m_comp->lvaGrabTemp(false DEBUGARG("returned continuation")); + m_comp->lvaGetDesc(m_returnedContinuationVar)->lvType = TYP_REF; + m_newContinuationVar = m_comp->lvaGrabTemp(false DEBUGARG("new continuation")); + m_comp->lvaGetDesc(m_newContinuationVar)->lvType = TYP_REF; + + m_comp->info.compCompHnd->getAsyncInfo(&m_asyncInfo); + +#ifdef JIT32_GCENCODER + // Due to a hard cap on epilogs we need a shared return here. + m_sharedReturnBB = m_comp->fgNewBBafter(BBJ_RETURN, m_comp->fgLastBBInMainFunction(), false); + m_sharedReturnBB->bbSetRunRarely(); + m_sharedReturnBB->clearTryIndex(); + m_sharedReturnBB->clearHndIndex(); + + if (m_comp->fgIsUsingProfileWeights()) + { + // All suspension BBs are cold, so we do not need to propagate any + // weights, but we do need to propagate the flag. + m_sharedReturnBB->SetFlags(BBF_PROF_WEIGHT); + } + + GenTree* continuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + GenTree* ret = m_comp->gtNewOperNode(GT_RETURN_SUSPEND, TYP_VOID, continuation); + LIR::AsRange(m_sharedReturnBB).InsertAtEnd(continuation, ret); + + JITDUMP("Created shared return BB " FMT_BB "\n", m_sharedReturnBB->bbNum); + + DISPRANGE(LIR::AsRange(m_sharedReturnBB)); +#endif + + // Compute liveness to be used for determining what must be captured on + // suspension. In unoptimized codegen we capture everything. + if (m_comp->opts.OptimizationEnabled()) + { + if (m_comp->m_dfsTree == nullptr) + { + m_comp->m_dfsTree = m_comp->fgComputeDfs(); + } + + m_comp->lvaComputeRefCounts(true, false); + m_comp->fgLocalVarLiveness(); + VarSetOps::AssignNoCopy(m_comp, m_comp->compCurLife, VarSetOps::MakeEmpty(m_comp)); + } + + AsyncLiveness liveness(m_comp, m_comp->opts.OptimizationEnabled()); + + // Now walk the IR for all the blocks that contain async calls. Keep track + // of liveness and outstanding LIR edges as we go; the LIR edges that cross + // async calls are additional live variables that must be spilled. + jitstd::vector defs(m_comp->getAllocator(CMK_Async)); + + for (int i = 0; i < worklist.Height(); i++) + { + assert(defs.size() == 0); + + BasicBlock* block = worklist.Bottom(i); + liveness.StartBlock(block); + + bool any; + do + { + any = false; + for (GenTree* tree : LIR::AsRange(block)) + { + // Remove all consumed defs; those are no longer 'live' LIR + // edges. + tree->VisitOperands([&defs](GenTree* op) { + if (op->IsValue()) + { + for (size_t i = defs.size(); i > 0; i--) + { + if (op == defs[i - 1]) + { + defs[i - 1] = defs[defs.size() - 1]; + defs.erase(defs.begin() + (defs.size() - 1), defs.end()); + break; + } + } + } + + return GenTree::VisitResult::Continue; + }); + + // Update liveness to reflect state after this node. + liveness.Update(tree); + + if (tree->IsCall() && tree->AsCall()->IsAsync() && !tree->AsCall()->IsTailCall()) + { + // Transform call; continue with the remainder block + Transform(block, tree->AsCall(), defs, liveness, &block); + defs.clear(); + any = true; + break; + } + + // Push a new definition if necessary; this defined value is + // now a live LIR edge. + if (tree->IsValue() && !tree->IsUnusedValue()) + { + defs.push_back(tree); + } + } + } while (any); + } + + // After transforming all async calls we have created resumption blocks; + // create the resumption switch. + CreateResumptionSwitch(); + + m_comp->fgInvalidateDfsTree(); + + return PhaseStatus::MODIFIED_EVERYTHING; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::Transform: +// Transform a single async call in the specified block. +// +// Parameters: +// block - The block containing the async call +// call - The async call +// defs - Current live LIR edges +// life - Liveness information about live locals +// remainder - [out] Remainder block after the transformation +// +void AsyncTransformation::Transform( + BasicBlock* block, GenTreeCall* call, jitstd::vector& defs, AsyncLiveness& life, BasicBlock** remainder) +{ +#ifdef DEBUG + if (m_comp->verbose) + { + printf("Processing call [%06u] in " FMT_BB "\n", Compiler::dspTreeID(call), block->bbNum); + printf(" %zu live LIR edges\n", defs.size()); + + if (defs.size() > 0) + { + const char* sep = " "; + for (GenTree* tree : defs) + { + printf("%s[%06u] (%s)", sep, Compiler::dspTreeID(tree), varTypeName(tree->TypeGet())); + sep = ", "; + } + + printf("\n"); + } + } +#endif + + m_liveLocalsScratch.clear(); + jitstd::vector& liveLocals = m_liveLocalsScratch; + + CreateLiveSetForSuspension(block, call, defs, life, liveLocals); + + ContinuationLayout layout = LayOutContinuation(block, call, liveLocals); + + CallDefinitionInfo callDefInfo = CanonicalizeCallDefinition(block, call, life); + + unsigned stateNum = (unsigned)m_resumptionBBs.size(); + JITDUMP(" Assigned state %u\n", stateNum); + + BasicBlock* suspendBB = CreateSuspension(block, stateNum, life, layout); + + CreateCheckAndSuspendAfterCall(block, callDefInfo, life, suspendBB, remainder); + + BasicBlock* resumeBB = CreateResumption(block, *remainder, call, callDefInfo, stateNum, layout); + + m_resumptionBBs.push_back(resumeBB); +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateLiveSetForSuspension: +// Create the set of live state to be captured for suspension, for the +// specified call. +// +// Parameters: +// block - The block containing the async call +// call - The async call +// defs - Current live LIR edges +// life - Liveness information about live locals +// liveLocals - Information about each live local. +// +void AsyncTransformation::CreateLiveSetForSuspension(BasicBlock* block, + GenTreeCall* call, + const jitstd::vector& defs, + AsyncLiveness& life, + jitstd::vector& liveLocals) +{ + unsigned fullyDefinedRetBufLcl = BAD_VAR_NUM; + CallArg* retbufArg = call->gtArgs.GetRetBufferArg(); + if (retbufArg != nullptr) + { + GenTree* retbuf = retbufArg->GetNode(); + if (retbuf->IsLclVarAddr()) + { + LclVarDsc* dsc = m_comp->lvaGetDesc(retbuf->AsLclVarCommon()); + ClassLayout* defLayout = m_comp->typGetObjLayout(call->gtRetClsHnd); + if (defLayout->GetSize() == dsc->lvExactSize()) + { + // This call fully defines this retbuf. There is no need to + // consider it live across the call since it is going to be + // overridden anyway. + fullyDefinedRetBufLcl = retbuf->AsLclVarCommon()->GetLclNum(); + JITDUMP(" V%02u is a fully defined retbuf and will not be considered live\n", fullyDefinedRetBufLcl); + } + } + } + + life.GetLiveLocals(liveLocals, fullyDefinedRetBufLcl); + LiftLIREdges(block, defs, liveLocals); + +#ifdef DEBUG + if (m_comp->verbose) + { + printf(" %zu live locals\n", liveLocals.size()); + + if (liveLocals.size() > 0) + { + const char* sep = " "; + for (LiveLocalInfo& inf : liveLocals) + { + printf("%sV%02u (%s)", sep, inf.LclNum, varTypeName(m_comp->lvaGetDesc(inf.LclNum)->TypeGet())); + sep = ", "; + } + + printf("\n"); + } + } +#endif +} + +//------------------------------------------------------------------------ +// AsyncTransformation::LiftLIREdges: +// Create locals capturing outstanding LIR edges and add information +// indicating that these locals are live. +// +// Parameters: +// block - The block containing the definitions of the LIR edges +// defs - Current outstanding LIR edges +// liveLocals - [out] Vector to add new live local information into +// +void AsyncTransformation::LiftLIREdges(BasicBlock* block, + const jitstd::vector& defs, + jitstd::vector& liveLocals) +{ + if (defs.size() <= 0) + { + return; + } + + for (GenTree* tree : defs) + { + // TODO-CQ: Enable this. It currently breaks our recognition of how the + // call is stored. + // if (tree->OperIs(GT_LCL_VAR)) + //{ + // LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon()); + // if (!dsc->IsAddressExposed()) + // { + // // No interference by IR invariants. + // LIR::AsRange(block).Remove(tree); + // LIR::AsRange(block).InsertAfter(beyond, tree); + // continue; + // } + //} + + LIR::Use use; + bool gotUse = LIR::AsRange(block).TryGetUse(tree, &use); + assert(gotUse); // Defs list should not contain unused values. + + unsigned newLclNum = use.ReplaceWithLclVar(m_comp); + liveLocals.push_back(LiveLocalInfo(newLclNum)); + GenTree* newUse = use.Def(); + LIR::AsRange(block).Remove(newUse); + LIR::AsRange(block).InsertBefore(use.User(), newUse); + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::LayOutContinuation: +// Create the layout of the GC pointer and data arrays in the continuation +// object. +// +// Parameters: +// block - The block containing the async call +// call - The async call +// liveLocals - [in, out] Information about each live local. Size/alignment +// information is read and offset/index information is written. +// +// Returns: +// Layout information. +// +ContinuationLayout AsyncTransformation::LayOutContinuation(BasicBlock* block, + GenTreeCall* call, + jitstd::vector& liveLocals) +{ + ContinuationLayout layout(liveLocals); + + for (LiveLocalInfo& inf : liveLocals) + { + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + + if ((dsc->TypeGet() == TYP_STRUCT) || dsc->IsImplicitByRef()) + { + ClassLayout* layout = dsc->GetLayout(); + assert(!layout->HasGCByRef()); + + if (layout->IsCustomLayout()) + { + inf.Alignment = 1; + inf.DataSize = layout->GetSize(); + inf.GCDataCount = layout->GetGCPtrCount(); + } + else + { + inf.Alignment = m_comp->info.compCompHnd->getClassAlignmentRequirement(layout->GetClassHandle()); + if ((layout->GetGCPtrCount() * TARGET_POINTER_SIZE) == layout->GetSize()) + { + inf.DataSize = 0; + } + else + { + inf.DataSize = layout->GetSize(); + } + + inf.GCDataCount = layout->GetGCPtrCount(); + } + } + else if (dsc->TypeGet() == TYP_REF) + { + inf.Alignment = TARGET_POINTER_SIZE; + inf.DataSize = 0; + inf.GCDataCount = 1; + } + else + { + assert(dsc->TypeGet() != TYP_BYREF); + + inf.Alignment = genTypeAlignments[dsc->TypeGet()]; + inf.DataSize = genTypeSize(dsc); + inf.GCDataCount = 0; + } + } + + jitstd::sort(liveLocals.begin(), liveLocals.end(), [](const LiveLocalInfo& lhs, const LiveLocalInfo& rhs) { + if (lhs.Alignment == rhs.Alignment) + { + // Prefer lowest local num first for same alignment. + return lhs.LclNum < rhs.LclNum; + } + + // Otherwise prefer highest alignment first. + return lhs.Alignment > rhs.Alignment; + }); + + // For OSR, we store the transition IL offset at the beginning of the data + // (-1 in the tier0 version): + if (m_comp->doesMethodHavePatchpoints() || m_comp->opts.IsOSR()) + { + JITDUMP(" Method %s; keeping an IL offset at the beginning of non-GC data\n", + m_comp->doesMethodHavePatchpoints() ? "has patchpoints" : "is an OSR method"); + layout.DataSize += sizeof(int); + } + + if (call->gtReturnType == TYP_STRUCT) + { + layout.ReturnStructLayout = m_comp->typGetObjLayout(call->gtRetClsHnd); + layout.ReturnSize = layout.ReturnStructLayout->GetSize(); + layout.ReturnInGCData = layout.ReturnStructLayout->HasGCPtr(); + } + else + { + layout.ReturnSize = genTypeSize(call->gtReturnType); + layout.ReturnInGCData = varTypeIsGC(call->gtReturnType); + } + + assert((layout.ReturnSize > 0) == (call->gtReturnType != TYP_VOID)); + + // The return value is always stored: + // 1. At index 0 in GCData if it is a TYP_REF or a struct with GC references + // 2. At index 0 in Data, for non OSR methods without GC ref returns + // 3. At index 4 in Data for OSR methods without GC ref returns. The + // continuation flags indicates this scenario with a flag. + if (layout.ReturnInGCData) + { + layout.GCRefsCount++; + } + else if (layout.ReturnSize > 0) + { + layout.ReturnValDataOffset = layout.DataSize; + layout.DataSize += layout.ReturnSize; + } + +#ifdef DEBUG + if (layout.ReturnSize > 0) + { + JITDUMP(" Will store return of type %s, size %u in", + call->gtReturnType == TYP_STRUCT ? layout.ReturnStructLayout->GetClassName() + : varTypeName(call->gtReturnType), + layout.ReturnSize); + + if (layout.ReturnInGCData) + { + JITDUMP(" GC data\n"); + } + else + { + JITDUMP(" non-GC data at offset %u\n", layout.ReturnValDataOffset); + } + } +#endif + + if (block->hasTryIndex()) + { + layout.ExceptionGCDataIndex = layout.GCRefsCount++; + JITDUMP(" " FMT_BB " is in try region %u; exception will be at GC@+%02u in GC data\n", block->bbNum, + block->getTryIndex(), layout.ExceptionGCDataIndex); + } + + for (LiveLocalInfo& inf : liveLocals) + { + layout.DataSize = roundUp(layout.DataSize, inf.Alignment); + + inf.DataOffset = layout.DataSize; + inf.GCDataIndex = layout.GCRefsCount; + + layout.DataSize += inf.DataSize; + layout.GCRefsCount += inf.GCDataCount; + } + +#ifdef DEBUG + if (m_comp->verbose) + { + printf(" Continuation layout (%u bytes, %u GC pointers):\n", layout.DataSize, layout.GCRefsCount); + for (LiveLocalInfo& inf : liveLocals) + { + printf(" +%03u (GC@+%02u) V%02u: %u bytes, %u GC pointers\n", inf.DataOffset, inf.GCDataIndex, + inf.LclNum, inf.DataSize, inf.GCDataCount); + } + } +#endif + + return layout; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CanonicalizeCallDefinition: +// Put the call definition in a canonical form. This ensures that either the +// value is defined by a LCL_ADDR retbuffer or by a +// STORE_LCL_VAR/STORE_LCL_FLD that follows the call node. +// +// Parameters: +// block - The block containing the async call +// call - The async call +// life - Liveness information about live locals +// +// Returns: +// Information about the definition after canonicalization. +// +CallDefinitionInfo AsyncTransformation::CanonicalizeCallDefinition(BasicBlock* block, + GenTreeCall* call, + AsyncLiveness& life) +{ + CallDefinitionInfo callDefInfo; + + callDefInfo.InsertAfter = call; + + CallArg* retbufArg = call->gtArgs.GetRetBufferArg(); + + if (!call->TypeIs(TYP_VOID) && !call->IsUnusedValue()) + { + assert(retbufArg == nullptr); + assert(call->gtNext != nullptr); + if (!call->gtNext->OperIsLocalStore() || (call->gtNext->Data() != call)) + { + LIR::Use use; + bool gotUse = LIR::AsRange(block).TryGetUse(call, &use); + assert(gotUse); + + use.ReplaceWithLclVar(m_comp); + } + else + { + // We will split after the store, but we still have to update liveness for it. + life.Update(call->gtNext); + } + + assert(call->gtNext->OperIsLocalStore() && (call->gtNext->Data() == call)); + callDefInfo.DefinitionNode = call->gtNext->AsLclVarCommon(); + callDefInfo.InsertAfter = call->gtNext; + } + + if (retbufArg != nullptr) + { + assert(call->TypeIs(TYP_VOID)); + + // For async methods we always expect retbufs to point to locals. We + // ensure this in impStoreStruct. + noway_assert(retbufArg->GetNode()->OperIs(GT_LCL_ADDR)); + + callDefInfo.DefinitionNode = retbufArg->GetNode()->AsLclVarCommon(); + } + + return callDefInfo; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateSuspension: +// Create the basic block that when branched to suspends execution after the +// specified async call. +// +// Parameters: +// block - The block containing the async call +// stateNum - State number assigned to this suspension point +// life - Liveness information about live locals +// layout - Layout information for the continuation object +// +// Returns: +// The new basic block that was created. +// +BasicBlock* AsyncTransformation::CreateSuspension(BasicBlock* block, + unsigned stateNum, + AsyncLiveness& life, + const ContinuationLayout& layout) +{ + if (m_lastSuspensionBB == nullptr) + { + m_lastSuspensionBB = m_comp->fgLastBBInMainFunction(); + } + + BasicBlock* suspendBB = m_comp->fgNewBBafter(BBJ_RETURN, m_lastSuspensionBB, false); + suspendBB->clearTryIndex(); + suspendBB->clearHndIndex(); + suspendBB->inheritWeightPercentage(block, 0); + m_lastSuspensionBB = suspendBB; + + if (m_sharedReturnBB != nullptr) + { + suspendBB->SetKindAndTargetEdge(BBJ_ALWAYS, m_comp->fgAddRefPred(m_sharedReturnBB, suspendBB)); + } + + JITDUMP(" Creating suspension " FMT_BB " for state %u\n", suspendBB->bbNum, stateNum); + + // Allocate continuation + GenTree* returnedContinuation = m_comp->gtNewLclvNode(m_returnedContinuationVar, TYP_REF); + + GenTreeCall* allocContinuation = + CreateAllocContinuationCall(life, returnedContinuation, layout.GCRefsCount, layout.DataSize); + + m_comp->compCurBB = suspendBB; + m_comp->fgMorphTree(allocContinuation); + + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, allocContinuation)); + + GenTree* storeNewContinuation = m_comp->gtNewStoreLclVarNode(m_newContinuationVar, allocContinuation); + LIR::AsRange(suspendBB).InsertAtEnd(storeNewContinuation); + + // Fill in 'Resume' + GenTree* newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned resumeOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationResumeFldHnd); + GenTree* resumeStubAddr = CreateResumptionStubAddrTree(); + GenTree* storeResume = StoreAtOffset(newContinuation, resumeOffset, resumeStubAddr, TYP_I_IMPL); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeResume)); + + // Fill in 'state' + newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned stateOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationStateFldHnd); + GenTree* stateNumNode = m_comp->gtNewIconNode((ssize_t)stateNum, TYP_INT); + GenTree* storeState = StoreAtOffset(newContinuation, stateOffset, stateNumNode, TYP_INT); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeState)); + + // Fill in 'flags' + unsigned continuationFlags = 0; + if (layout.ReturnInGCData) + continuationFlags |= CORINFO_CONTINUATION_RESULT_IN_GCDATA; + if (block->hasTryIndex()) + continuationFlags |= CORINFO_CONTINUATION_NEEDS_EXCEPTION; + if (m_comp->doesMethodHavePatchpoints() || m_comp->opts.IsOSR()) + continuationFlags |= CORINFO_CONTINUATION_OSR_IL_OFFSET_IN_DATA; + + newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned flagsOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationFlagsFldHnd); + GenTree* flagsNode = m_comp->gtNewIconNode((ssize_t)continuationFlags, TYP_INT); + GenTree* storeFlags = StoreAtOffset(newContinuation, flagsOffset, flagsNode, TYP_INT); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeFlags)); + + if (layout.GCRefsCount > 0) + { + FillInGCPointersOnSuspension(layout.Locals, suspendBB); + } + + if (layout.DataSize > 0) + { + FillInDataOnSuspension(layout.Locals, suspendBB); + } + + if (suspendBB->KindIs(BBJ_RETURN)) + { + newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + GenTree* ret = m_comp->gtNewOperNode(GT_RETURN_SUSPEND, TYP_VOID, newContinuation); + LIR::AsRange(suspendBB).InsertAtEnd(newContinuation, ret); + } + + return suspendBB; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateAllocContinuationCall: +// Create a call to the JIT helper that allocates a continuation. +// +// Parameters: +// life - Liveness information about live locals +// prevContinuation - IR node that has the value of the previous continuation object +// gcRefsCount - Number of GC refs to allocate in the continuation object +// dataSize - Number of bytes to allocate in the continuation object +// +// Returns: +// IR node representing the allocation. +// +GenTreeCall* AsyncTransformation::CreateAllocContinuationCall(AsyncLiveness& life, + GenTree* prevContinuation, + unsigned gcRefsCount, + unsigned dataSize) +{ + GenTree* gcRefsCountNode = m_comp->gtNewIconNode((ssize_t)gcRefsCount, TYP_I_IMPL); + GenTree* dataSizeNode = m_comp->gtNewIconNode((ssize_t)dataSize, TYP_I_IMPL); + // If VM requests that we report the method handle, or if we have a shared generic context method handle + // that is live here, then we need to call a different helper to keep the loader alive. + GenTree* methodHandleArg = nullptr; + GenTree* classHandleArg = nullptr; + if (((m_comp->info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_METHODDESC) != 0) && + life.IsLive(m_comp->info.compTypeCtxtArg)) + { + methodHandleArg = m_comp->gtNewLclvNode(m_comp->info.compTypeCtxtArg, TYP_I_IMPL); + } + else if (((m_comp->info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_METHODTABLE) != 0) && + life.IsLive(m_comp->info.compTypeCtxtArg)) + { + classHandleArg = m_comp->gtNewLclvNode(m_comp->info.compTypeCtxtArg, TYP_I_IMPL); + } + else if (m_asyncInfo.continuationsNeedMethodHandle) + { + methodHandleArg = m_comp->gtNewIconEmbMethHndNode(m_comp->info.compMethodHnd); + } + + if (methodHandleArg != nullptr) + { + return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION_METHOD, TYP_REF, prevContinuation, + gcRefsCountNode, dataSizeNode, methodHandleArg); + } + + if (classHandleArg != nullptr) + { + return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION_CLASS, TYP_REF, prevContinuation, + gcRefsCountNode, dataSizeNode, classHandleArg); + } + + return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION, TYP_REF, prevContinuation, gcRefsCountNode, + dataSizeNode); +} + +//------------------------------------------------------------------------ +// AsyncTransformation::FillInGCPointersOnSuspension: +// Create IR that fills the GC pointers of the continuation object. +// This also nulls out the GC pointers in the locals if the local has data +// parts that need to be stored. +// +// Parameters: +// liveLocals - Information about each live local. +// suspendBB - Basic block to add IR to. +// +void AsyncTransformation::FillInGCPointersOnSuspension(const jitstd::vector& liveLocals, + BasicBlock* suspendBB) +{ + unsigned objectArrLclNum = GetGCDataArrayVar(); + + GenTree* newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned gcDataOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationGCDataFldHnd); + GenTree* gcDataInd = LoadFromOffset(newContinuation, gcDataOffset, TYP_REF); + GenTree* storeAllocedObjectArr = m_comp->gtNewStoreLclVarNode(objectArrLclNum, gcDataInd); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeAllocedObjectArr)); + + for (const LiveLocalInfo& inf : liveLocals) + { + if (inf.GCDataCount <= 0) + { + continue; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + if (dsc->TypeGet() == TYP_REF) + { + GenTree* value = m_comp->gtNewLclvNode(inf.LclNum, TYP_REF); + GenTree* objectArr = m_comp->gtNewLclvNode(objectArrLclNum, TYP_REF); + GenTree* store = + StoreAtOffset(objectArr, OFFSETOF__CORINFO_Array__data + (inf.GCDataIndex * TARGET_POINTER_SIZE), value, + TYP_REF); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } + else + { + assert((dsc->TypeGet() == TYP_STRUCT) || dsc->IsImplicitByRef()); + ClassLayout* layout = dsc->GetLayout(); + unsigned numSlots = layout->GetSlotCount(); + unsigned gcRefIndex = 0; + for (unsigned i = 0; i < numSlots; i++) + { + var_types gcPtrType = layout->GetGCPtrType(i); + assert((gcPtrType == TYP_I_IMPL) || (gcPtrType == TYP_REF)); + if (gcPtrType != TYP_REF) + { + continue; + } + + GenTree* value; + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + value = LoadFromOffset(baseAddr, i * TARGET_POINTER_SIZE, TYP_REF); + } + else + { + value = m_comp->gtNewLclFldNode(inf.LclNum, TYP_REF, i * TARGET_POINTER_SIZE); + } + + GenTree* objectArr = m_comp->gtNewLclvNode(objectArrLclNum, TYP_REF); + unsigned offset = + OFFSETOF__CORINFO_Array__data + ((inf.GCDataIndex + gcRefIndex) * TARGET_POINTER_SIZE); + GenTree* store = StoreAtOffset(objectArr, offset, value, TYP_REF); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + + gcRefIndex++; + + if (inf.DataSize > 0) + { + // Null out the GC field in preparation of storing the rest. + GenTree* null = m_comp->gtNewNull(); + + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + store = StoreAtOffset(baseAddr, i * TARGET_POINTER_SIZE, null, TYP_REF); + } + else + { + store = m_comp->gtNewStoreLclFldNode(inf.LclNum, TYP_REF, i * TARGET_POINTER_SIZE, null); + } + + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } + } + + if (!dsc->IsImplicitByRef()) + { + m_comp->lvaSetVarDoNotEnregister(inf.LclNum DEBUGARG(DoNotEnregisterReason::LocalField)); + } + } + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::FillInDataOnSuspension: +// Create IR that fills the data array of the continuation object. +// +// Parameters: +// liveLocals - Information about each live local. +// suspendBB - Basic block to add IR to. +// +void AsyncTransformation::FillInDataOnSuspension(const jitstd::vector& liveLocals, BasicBlock* suspendBB) +{ + unsigned byteArrLclNum = GetDataArrayVar(); + + GenTree* newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned dataOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationDataFldHnd); + GenTree* dataInd = LoadFromOffset(newContinuation, dataOffset, TYP_REF); + GenTree* storeAllocedByteArr = m_comp->gtNewStoreLclVarNode(byteArrLclNum, dataInd); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeAllocedByteArr)); + + if (m_comp->doesMethodHavePatchpoints() || m_comp->opts.IsOSR()) + { + GenTree* ilOffsetToStore; + if (m_comp->doesMethodHavePatchpoints()) + ilOffsetToStore = m_comp->gtNewIconNode(-1); + else + ilOffsetToStore = m_comp->gtNewIconNode((int)m_comp->info.compILEntry); + + GenTree* byteArr = m_comp->gtNewLclvNode(byteArrLclNum, TYP_REF); + unsigned offset = OFFSETOF__CORINFO_Array__data; + GenTree* storePatchpointOffset = StoreAtOffset(byteArr, offset, ilOffsetToStore, TYP_INT); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storePatchpointOffset)); + } + + // Fill in data + for (const LiveLocalInfo& inf : liveLocals) + { + if (inf.DataSize <= 0) + { + continue; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + + GenTree* byteArr = m_comp->gtNewLclvNode(byteArrLclNum, TYP_REF); + unsigned offset = OFFSETOF__CORINFO_Array__data + inf.DataOffset; + + GenTree* value; + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + value = m_comp->gtNewBlkIndir(dsc->GetLayout(), baseAddr, GTF_IND_NONFAULTING); + } + else + { + value = m_comp->gtNewLclVarNode(inf.LclNum); + } + + GenTree* store; + if ((dsc->TypeGet() == TYP_STRUCT) || dsc->IsImplicitByRef()) + { + GenTree* cns = m_comp->gtNewIconNode((ssize_t)offset, TYP_I_IMPL); + GenTree* addr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, byteArr, cns); + // This is to heap, but all GC refs are nulled out already, so we can skip the write barrier. + // TODO-CQ: Backend does not care about GTF_IND_TGT_NOT_HEAP for STORE_BLK. + store = + m_comp->gtNewStoreBlkNode(dsc->GetLayout(), addr, value, GTF_IND_NONFAULTING | GTF_IND_TGT_NOT_HEAP); + } + else + { + store = StoreAtOffset(byteArr, offset, value, dsc->TypeGet()); + } + + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateCheckAndSuspendAfterCall: +// Split the block containing the specified async call, and create the IR +// that checks whether suspension should be done after an async call. +// +// Parameters: +// block - The block containing the async call +// callDefInfo - Information about the async call's definition +// life - Liveness information about live locals +// suspendBB - Basic block to add IR to +// remainder - [out] The remainder block containing the IR that was after the async call. +// +void AsyncTransformation::CreateCheckAndSuspendAfterCall(BasicBlock* block, + const CallDefinitionInfo& callDefInfo, + AsyncLiveness& life, + BasicBlock* suspendBB, + BasicBlock** remainder) +{ + GenTree* continuationArg = new (m_comp, GT_ASYNC_CONTINUATION) GenTree(GT_ASYNC_CONTINUATION, TYP_REF); + continuationArg->SetHasOrderingSideEffect(); + + GenTree* storeContinuation = m_comp->gtNewStoreLclVarNode(m_returnedContinuationVar, continuationArg); + LIR::AsRange(block).InsertAfter(callDefInfo.InsertAfter, continuationArg, storeContinuation); + + GenTree* null = m_comp->gtNewNull(); + GenTree* returnedContinuation = m_comp->gtNewLclvNode(m_returnedContinuationVar, TYP_REF); + GenTree* neNull = m_comp->gtNewOperNode(GT_NE, TYP_INT, returnedContinuation, null); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, neNull); + + LIR::AsRange(block).InsertAfter(storeContinuation, null, returnedContinuation, neNull, jtrue); + *remainder = m_comp->fgSplitBlockAfterNode(block, jtrue); + JITDUMP(" Remainder is " FMT_BB "\n", (*remainder)->bbNum); + + FlowEdge* retBBEdge = m_comp->fgAddRefPred(suspendBB, block); + block->SetCond(retBBEdge, block->GetTargetEdge()); + + block->GetTrueEdge()->setLikelihood(0); + block->GetFalseEdge()->setLikelihood(1); +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateResumption: +// Create the basic block that when branched to resumes execution on entry to +// the function. +// +// Parameters: +// block - The block containing the async call +// remainder - The block that contains the IR after the (split) async call +// call - The async call +// callDefInfo - Information about the async call's definition +// stateNum - State number assigned to this suspension point +// layout - Layout information for the continuation object +// +// Returns: +// The new basic block that was created. +// +BasicBlock* AsyncTransformation::CreateResumption(BasicBlock* block, + BasicBlock* remainder, + GenTreeCall* call, + const CallDefinitionInfo& callDefInfo, + unsigned stateNum, + const ContinuationLayout& layout) +{ + if (m_lastResumptionBB == nullptr) + { + m_lastResumptionBB = m_comp->fgLastBBInMainFunction(); + } + + BasicBlock* resumeBB = m_comp->fgNewBBafter(BBJ_ALWAYS, m_lastResumptionBB, true); + FlowEdge* remainderEdge = m_comp->fgAddRefPred(remainder, resumeBB); + + // It does not really make sense to inherit from the target, but given this + // is always 0% this just propagates the profile weight flag + sets + // BBF_RUN_RARELY. + resumeBB->inheritWeightPercentage(remainder, 0); + resumeBB->SetTargetEdge(remainderEdge); + resumeBB->clearTryIndex(); + resumeBB->clearHndIndex(); + resumeBB->SetFlags(BBF_ASYNC_RESUMPTION); + m_lastResumptionBB = resumeBB; + + JITDUMP(" Creating resumption " FMT_BB " for state %u\n", resumeBB->bbNum, stateNum); + + unsigned resumeByteArrLclNum = BAD_VAR_NUM; + if (layout.DataSize > 0) + { + resumeByteArrLclNum = GetDataArrayVar(); + + GenTree* newContinuation = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned dataOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationDataFldHnd); + GenTree* dataInd = LoadFromOffset(newContinuation, dataOffset, TYP_REF); + GenTree* storeAllocedByteArr = m_comp->gtNewStoreLclVarNode(resumeByteArrLclNum, dataInd); + + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, storeAllocedByteArr)); + + RestoreFromDataOnResumption(resumeByteArrLclNum, layout.Locals, resumeBB); + } + + unsigned resumeObjectArrLclNum = BAD_VAR_NUM; + BasicBlock* storeResultBB = resumeBB; + + if (layout.GCRefsCount > 0) + { + resumeObjectArrLclNum = GetGCDataArrayVar(); + + GenTree* newContinuation = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned gcDataOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationGCDataFldHnd); + GenTree* gcDataInd = LoadFromOffset(newContinuation, gcDataOffset, TYP_REF); + GenTree* storeAllocedObjectArr = m_comp->gtNewStoreLclVarNode(resumeObjectArrLclNum, gcDataInd); + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, storeAllocedObjectArr)); + + RestoreFromGCPointersOnResumption(resumeObjectArrLclNum, layout.Locals, resumeBB); + + if (layout.ExceptionGCDataIndex != UINT_MAX) + { + storeResultBB = RethrowExceptionOnResumption(block, remainder, resumeObjectArrLclNum, layout, resumeBB); + } + } + + // Copy call return value. + if (layout.ReturnSize > 0) + { + CopyReturnValueOnResumption(call, callDefInfo, resumeByteArrLclNum, resumeObjectArrLclNum, layout, + storeResultBB); + } + + return resumeBB; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::RestoreFromDataOnResumption: +// Create IR that restores locals from the data array of the continuation +// object. +// +// Parameters: +// resumeByteArrLclNum - Local that has the continuation object's data array +// liveLocals - Information about each live local. +// resumeBB - Basic block to append IR to +// +void AsyncTransformation::RestoreFromDataOnResumption(unsigned resumeByteArrLclNum, + const jitstd::vector& liveLocals, + BasicBlock* resumeBB) +{ + // Copy data + for (const LiveLocalInfo& inf : liveLocals) + { + if (inf.DataSize <= 0) + { + continue; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + + GenTree* byteArr = m_comp->gtNewLclvNode(resumeByteArrLclNum, TYP_REF); + unsigned offset = OFFSETOF__CORINFO_Array__data + inf.DataOffset; + GenTree* cns = m_comp->gtNewIconNode((ssize_t)offset, TYP_I_IMPL); + GenTree* addr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, byteArr, cns); + + GenTree* value; + if ((dsc->TypeGet() == TYP_STRUCT) || dsc->IsImplicitByRef()) + { + value = m_comp->gtNewBlkIndir(dsc->GetLayout(), addr, GTF_IND_NONFAULTING); + } + else + { + value = m_comp->gtNewIndir(dsc->TypeGet(), addr, GTF_IND_NONFAULTING); + } + + GenTree* store; + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + store = m_comp->gtNewStoreBlkNode(dsc->GetLayout(), baseAddr, value, + GTF_IND_NONFAULTING | GTF_IND_TGT_NOT_HEAP); + } + else + { + store = m_comp->gtNewStoreLclVarNode(inf.LclNum, value); + } + + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::RestoreFromGCPointersOnResumption: +// Create IR that restores locals from the GC pointers array of the +// continuation object. +// +// Parameters: +// resumeObjectArrLclNum - Local that has the continuation object's GC pointers array +// liveLocals - Information about each live local. +// resumeBB - Basic block to append IR to +// +void AsyncTransformation::RestoreFromGCPointersOnResumption(unsigned resumeObjectArrLclNum, + const jitstd::vector& liveLocals, + BasicBlock* resumeBB) +{ + for (const LiveLocalInfo& inf : liveLocals) + { + if (inf.GCDataCount <= 0) + { + continue; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + if (dsc->TypeGet() == TYP_REF) + { + GenTree* objectArr = m_comp->gtNewLclvNode(resumeObjectArrLclNum, TYP_REF); + unsigned offset = OFFSETOF__CORINFO_Array__data + (inf.GCDataIndex * TARGET_POINTER_SIZE); + GenTree* value = LoadFromOffset(objectArr, offset, TYP_REF); + GenTree* store = m_comp->gtNewStoreLclVarNode(inf.LclNum, value); + + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } + else + { + assert((dsc->TypeGet() == TYP_STRUCT) || dsc->IsImplicitByRef()); + ClassLayout* layout = dsc->GetLayout(); + unsigned numSlots = layout->GetSlotCount(); + unsigned gcRefIndex = 0; + for (unsigned i = 0; i < numSlots; i++) + { + var_types gcPtrType = layout->GetGCPtrType(i); + assert((gcPtrType == TYP_I_IMPL) || (gcPtrType == TYP_REF)); + if (gcPtrType != TYP_REF) + { + continue; + } + + GenTree* objectArr = m_comp->gtNewLclvNode(resumeObjectArrLclNum, TYP_REF); + unsigned offset = + OFFSETOF__CORINFO_Array__data + ((inf.GCDataIndex + gcRefIndex) * TARGET_POINTER_SIZE); + GenTree* value = LoadFromOffset(objectArr, offset, TYP_REF); + GenTree* store; + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + store = StoreAtOffset(baseAddr, i * TARGET_POINTER_SIZE, value, TYP_REF); + // Implicit byref args are never on heap + store->gtFlags |= GTF_IND_TGT_NOT_HEAP; + } + else + { + store = m_comp->gtNewStoreLclFldNode(inf.LclNum, TYP_REF, i * TARGET_POINTER_SIZE, value); + } + + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + + gcRefIndex++; + } + } + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::RethrowExceptionOnResumption: +// Create IR that checks for an exception and rethrows it at the original +// suspension point if necessary. +// +// Parameters: +// block - The block containing the async call +// remainder - The block that contains the IR after the (split) async call +// resumeObjectArrLclNum - Local that has the continuation object's GC pointers array +// layout - Layout information for the continuation object +// resumeBB - Basic block to append IR to +// +// Returns: +// The new non-exception successor basic block for resumption. This is the +// basic block where execution will continue if there was no exception to +// rethrow. +// +BasicBlock* AsyncTransformation::RethrowExceptionOnResumption(BasicBlock* block, + BasicBlock* remainder, + unsigned resumeObjectArrLclNum, + const ContinuationLayout& layout, + BasicBlock* resumeBB) +{ + JITDUMP(" We need to rethrow an exception\n"); + + BasicBlock* rethrowExceptionBB = + m_comp->fgNewBBinRegion(BBJ_THROW, block, /* runRarely */ true, /* insertAtEnd */ true); + JITDUMP(" Created " FMT_BB " to rethrow exception on resumption\n", rethrowExceptionBB->bbNum); + + BasicBlock* storeResultBB = m_comp->fgNewBBafter(BBJ_ALWAYS, resumeBB, true); + JITDUMP(" Created " FMT_BB " to store result when resuming with no exception\n", storeResultBB->bbNum); + + FlowEdge* rethrowEdge = m_comp->fgAddRefPred(rethrowExceptionBB, resumeBB); + FlowEdge* storeResultEdge = m_comp->fgAddRefPred(storeResultBB, resumeBB); + + assert(resumeBB->KindIs(BBJ_ALWAYS)); + m_comp->fgRemoveRefPred(resumeBB->GetTargetEdge()); + + resumeBB->SetCond(rethrowEdge, storeResultEdge); + rethrowEdge->setLikelihood(0); + storeResultEdge->setLikelihood(1); + rethrowExceptionBB->inheritWeightPercentage(resumeBB, 0); + storeResultBB->inheritWeightPercentage(resumeBB, 100); + JITDUMP(" Resumption " FMT_BB " becomes BBJ_COND to check for non-null exception\n", resumeBB->bbNum); + + FlowEdge* remainderEdge = m_comp->fgAddRefPred(remainder, storeResultBB); + storeResultBB->SetTargetEdge(remainderEdge); + + m_lastResumptionBB = storeResultBB; + + // Check if we have an exception. + unsigned exceptionLclNum = GetExceptionVar(); + GenTree* objectArr = m_comp->gtNewLclvNode(resumeObjectArrLclNum, TYP_REF); + unsigned exceptionOffset = OFFSETOF__CORINFO_Array__data + layout.ExceptionGCDataIndex * TARGET_POINTER_SIZE; + GenTree* exceptionInd = LoadFromOffset(objectArr, exceptionOffset, TYP_REF); + GenTree* storeException = m_comp->gtNewStoreLclVarNode(exceptionLclNum, exceptionInd); + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, storeException)); + + GenTree* exception = m_comp->gtNewLclVarNode(exceptionLclNum, TYP_REF); + GenTree* null = m_comp->gtNewNull(); + GenTree* neNull = m_comp->gtNewOperNode(GT_NE, TYP_INT, exception, null); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, neNull); + LIR::AsRange(resumeBB).InsertAtEnd(exception, null, neNull, jtrue); + + exception = m_comp->gtNewLclVarNode(exceptionLclNum, TYP_REF); + GenTreeCall* rethrowException = m_comp->gtNewHelperCallNode(CORINFO_HELP_THROWEXACT, TYP_VOID, exception); + + m_comp->compCurBB = rethrowExceptionBB; + m_comp->fgMorphTree(rethrowException); + + LIR::AsRange(rethrowExceptionBB).InsertAtEnd(LIR::SeqTree(m_comp, rethrowException)); + + storeResultBB->SetFlags(BBF_ASYNC_RESUMPTION); + JITDUMP(" Added " FMT_BB " to rethrow exception at suspension point\n", rethrowExceptionBB->bbNum); + + return storeResultBB; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CopyReturnValueOnResumption: +// Create IR that copies the return value from the continuation object to the +// right local. +// +// Parameters: +// call - The async call +// callDefInfo - Information about the async call's definition +// block - The block containing the async call +// resumeByteArrLclNum - Local that has the continuation object's data array +// resumeObjectArrLclNum - Local that has the continuation object's GC pointers array +// layout - Layout information for the continuation object +// storeResultBB - Basic block to append IR to +// +void AsyncTransformation::CopyReturnValueOnResumption(GenTreeCall* call, + const CallDefinitionInfo& callDefInfo, + unsigned resumeByteArrLclNum, + unsigned resumeObjectArrLclNum, + const ContinuationLayout& layout, + BasicBlock* storeResultBB) +{ + GenTree* resultBase; + unsigned resultOffset; + GenTreeFlags resultIndirFlags = GTF_IND_NONFAULTING; + if (layout.ReturnInGCData) + { + assert(resumeObjectArrLclNum != BAD_VAR_NUM); + resultBase = m_comp->gtNewLclvNode(resumeObjectArrLclNum, TYP_REF); + + if (call->gtReturnType == TYP_STRUCT) + { + // Boxed struct. + resultBase = LoadFromOffset(resultBase, OFFSETOF__CORINFO_Array__data, TYP_REF); + resultOffset = TARGET_POINTER_SIZE; // Offset of data inside box + } + else + { + assert(call->gtReturnType == TYP_REF); + resultOffset = OFFSETOF__CORINFO_Array__data; + } + } + else + { + assert(resumeByteArrLclNum != BAD_VAR_NUM); + resultBase = m_comp->gtNewLclvNode(resumeByteArrLclNum, TYP_REF); + resultOffset = OFFSETOF__CORINFO_Array__data + layout.ReturnValDataOffset; + if (layout.ReturnValDataOffset != 0) + resultIndirFlags = GTF_IND_UNALIGNED; + } + + assert(callDefInfo.DefinitionNode != nullptr); + LclVarDsc* resultLcl = m_comp->lvaGetDesc(callDefInfo.DefinitionNode); + assert((resultLcl->TypeGet() == TYP_STRUCT) == (call->gtReturnType == TYP_STRUCT)); + + // TODO-TP: We can use liveness to avoid generating a lot of this IR. + if (call->gtReturnType == TYP_STRUCT) + { + if (m_comp->lvaGetPromotionType(resultLcl) != Compiler::PROMOTION_TYPE_INDEPENDENT) + { + GenTree* resultOffsetNode = m_comp->gtNewIconNode((ssize_t)resultOffset, TYP_I_IMPL); + GenTree* resultAddr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, resultBase, resultOffsetNode); + GenTree* resultData = m_comp->gtNewBlkIndir(layout.ReturnStructLayout, resultAddr, resultIndirFlags); + GenTree* storeResult; + if ((callDefInfo.DefinitionNode->GetLclOffs() == 0) && + ClassLayout::AreCompatible(resultLcl->GetLayout(), layout.ReturnStructLayout)) + { + storeResult = m_comp->gtNewStoreLclVarNode(callDefInfo.DefinitionNode->GetLclNum(), resultData); + } + else + { + storeResult = m_comp->gtNewStoreLclFldNode(callDefInfo.DefinitionNode->GetLclNum(), TYP_STRUCT, + layout.ReturnStructLayout, + callDefInfo.DefinitionNode->GetLclOffs(), resultData); + } + + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_comp, storeResult)); + } + else + { + assert(!call->gtArgs.HasRetBuffer()); // Locals defined through retbufs are never independently promoted. + + if ((resultLcl->lvFieldCnt > 1) && !resultBase->OperIsLocal()) + { + unsigned resultBaseVar = GetResultBaseVar(); + GenTree* storeResultBase = m_comp->gtNewStoreLclVarNode(resultBaseVar, resultBase); + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_comp, storeResultBase)); + + resultBase = m_comp->gtNewLclVarNode(resultBaseVar, TYP_REF); + } + + assert(callDefInfo.DefinitionNode->OperIs(GT_STORE_LCL_VAR)); + for (unsigned i = 0; i < resultLcl->lvFieldCnt; i++) + { + unsigned fieldLclNum = resultLcl->lvFieldLclStart + i; + LclVarDsc* fieldDsc = m_comp->lvaGetDesc(fieldLclNum); + + unsigned fldOffset = resultOffset + fieldDsc->lvFldOffset; + GenTree* value = LoadFromOffset(resultBase, fldOffset, fieldDsc->TypeGet(), resultIndirFlags); + GenTree* store = m_comp->gtNewStoreLclVarNode(fieldLclNum, value); + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + + if (i + 1 != resultLcl->lvFieldCnt) + { + resultBase = m_comp->gtCloneExpr(resultBase); + } + } + } + } + else + { + GenTree* value = LoadFromOffset(resultBase, resultOffset, call->gtReturnType, resultIndirFlags); + + GenTree* storeResult; + if (callDefInfo.DefinitionNode->OperIs(GT_STORE_LCL_VAR)) + { + storeResult = m_comp->gtNewStoreLclVarNode(callDefInfo.DefinitionNode->GetLclNum(), value); + } + else + { + storeResult = m_comp->gtNewStoreLclFldNode(callDefInfo.DefinitionNode->GetLclNum(), + callDefInfo.DefinitionNode->TypeGet(), + callDefInfo.DefinitionNode->GetLclOffs(), value); + } + + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_comp, storeResult)); + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::LoadFromOffset: +// Create a load. +// +// Parameters: +// base - Base address of the load +// offset - Offset to add on top of the base address +// type - Type of the load to create +// indirFlags - Flags to add to the load +// +// Returns: +// IR node of the load. +// +GenTreeIndir* AsyncTransformation::LoadFromOffset(GenTree* base, + unsigned offset, + var_types type, + GenTreeFlags indirFlags) +{ + assert(base->TypeIs(TYP_REF, TYP_BYREF, TYP_I_IMPL)); + GenTree* cns = m_comp->gtNewIconNode((ssize_t)offset, TYP_I_IMPL); + var_types addrType = base->TypeIs(TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF; + GenTree* addr = m_comp->gtNewOperNode(GT_ADD, addrType, base, cns); + GenTreeIndir* load = m_comp->gtNewIndir(type, addr, indirFlags); + return load; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::StoreAtOffset: +// Create a store. +// +// Parameters: +// base - Base address of the store +// offset - Offset to add on top of the base address +// value - Value to store +// storeType - Type of store +// +// Returns: +// IR node of the store. +// +GenTreeStoreInd* AsyncTransformation::StoreAtOffset(GenTree* base, unsigned offset, GenTree* value, var_types storeType) +{ + assert(base->TypeIs(TYP_REF, TYP_BYREF, TYP_I_IMPL)); + GenTree* cns = m_comp->gtNewIconNode((ssize_t)offset, TYP_I_IMPL); + var_types addrType = base->TypeIs(TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF; + GenTree* addr = m_comp->gtNewOperNode(GT_ADD, addrType, base, cns); + GenTreeStoreInd* store = m_comp->gtNewStoreIndNode(storeType, addr, value, GTF_IND_NONFAULTING); + return store; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::GetDataArrayVar: +// Create a new local to hold the data array of the continuation object. This +// local can be validly used for the entire suspension point; the returned +// local may be used by multiple suspension points. +// +// Returns: +// Local number. +// +unsigned AsyncTransformation::GetDataArrayVar() +{ + // Create separate locals unless we have many locals in the method for live + // range splitting purposes. This helps LSRA to avoid create additional + // callee saves that harm the prolog/epilog. + if ((m_dataArrayVar == BAD_VAR_NUM) || !m_comp->lvaHaveManyLocals()) + { + m_dataArrayVar = m_comp->lvaGrabTemp(false DEBUGARG("byte[] for continuation")); + m_comp->lvaGetDesc(m_dataArrayVar)->lvType = TYP_REF; + } + + return m_dataArrayVar; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::GetGCDataArrayVar: +// Create a new local to hold the GC pointers array of the continuation +// object. This local can be validly used for the entire suspension point; +// the returned local may be used by multiple suspension points. +// +// Returns: +// Local number. +// +unsigned AsyncTransformation::GetGCDataArrayVar() +{ + if ((m_gcDataArrayVar == BAD_VAR_NUM) || !m_comp->lvaHaveManyLocals()) + { + m_gcDataArrayVar = m_comp->lvaGrabTemp(false DEBUGARG("object[] for continuation")); + m_comp->lvaGetDesc(m_gcDataArrayVar)->lvType = TYP_REF; + } + + return m_gcDataArrayVar; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::GetResultBaseVar: +// Create a new local to hold the base address of the incoming result from +// the continuation. This local can be validly used for the entire suspension +// point; the returned local may be used by multiple suspension points. +// +// Returns: +// Local number. +// +unsigned AsyncTransformation::GetResultBaseVar() +{ + if ((m_resultBaseVar == BAD_VAR_NUM) || !m_comp->lvaHaveManyLocals()) + { + m_resultBaseVar = m_comp->lvaGrabTemp(false DEBUGARG("object for resuming result base")); + m_comp->lvaGetDesc(m_resultBaseVar)->lvType = TYP_REF; + } + + return m_resultBaseVar; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::GetExceptionVar: +// Create a new local to hold the exception in the continuation. This +// local can be validly used for the entire suspension point; the returned +// local may be used by multiple suspension points. +// +// Returns: +// Local number. +// +unsigned AsyncTransformation::GetExceptionVar() +{ + if ((m_exceptionVar == BAD_VAR_NUM) || !m_comp->lvaHaveManyLocals()) + { + m_exceptionVar = m_comp->lvaGrabTemp(false DEBUGARG("object for resuming exception")); + m_comp->lvaGetDesc(m_exceptionVar)->lvType = TYP_REF; + } + + return m_exceptionVar; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateResumptionStubAddrTree: +// Create a tree that represents the address of the resumption stub entry +// point. +// +// Returns: +// IR node. +// +GenTree* AsyncTransformation::CreateResumptionStubAddrTree() +{ + switch (m_resumeStubLookup.accessType) + { + case IAT_VALUE: + { + return CreateFunctionTargetAddr(m_resumeStub, m_resumeStubLookup); + } + case IAT_PVALUE: + { + GenTree* tree = CreateFunctionTargetAddr(m_resumeStub, m_resumeStubLookup); + tree = m_comp->gtNewIndir(TYP_I_IMPL, tree, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + return tree; + } + case IAT_PPVALUE: + { + noway_assert(!"Unexpected IAT_PPVALUE"); + return nullptr; + } + case IAT_RELPVALUE: + { + GenTree* addr = CreateFunctionTargetAddr(m_resumeStub, m_resumeStubLookup); + GenTree* tree = CreateFunctionTargetAddr(m_resumeStub, m_resumeStubLookup); + tree = m_comp->gtNewIndir(TYP_I_IMPL, tree, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + tree = m_comp->gtNewOperNode(GT_ADD, TYP_I_IMPL, tree, addr); + return tree; + } + default: + { + noway_assert(!"Bad accessType"); + return nullptr; + } + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateFunctionTargetAddr: +// Create a tree that represents the address of the resumption stub entry +// point. +// +// Returns: +// IR node. +// +GenTree* AsyncTransformation::CreateFunctionTargetAddr(CORINFO_METHOD_HANDLE methHnd, + const CORINFO_CONST_LOOKUP& lookup) +{ + GenTree* con = m_comp->gtNewIconHandleNode((size_t)lookup.addr, GTF_ICON_FTN_ADDR); + INDEBUG(con->AsIntCon()->gtTargetHandle = (size_t)methHnd); + return con; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateResumptionSwitch: +// Create the IR for the entry of the function that checks the continuation +// and dispatches on its state number. +// +void AsyncTransformation::CreateResumptionSwitch() +{ + m_comp->fgCreateNewInitBB(); + BasicBlock* newEntryBB = m_comp->fgFirstBB; + + GenTree* continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + GenTree* null = m_comp->gtNewNull(); + GenTree* neNull = m_comp->gtNewOperNode(GT_NE, TYP_INT, continuationArg, null); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, neNull); + LIR::AsRange(newEntryBB).InsertAtEnd(continuationArg, null, neNull, jtrue); + + FlowEdge* resumingEdge; + + if (m_resumptionBBs.size() == 1) + { + JITDUMP(" Redirecting entry " FMT_BB " directly to " FMT_BB " as it is the only resumption block\n", + newEntryBB->bbNum, m_resumptionBBs[0]->bbNum); + resumingEdge = m_comp->fgAddRefPred(m_resumptionBBs[0], newEntryBB); + } + else if (m_resumptionBBs.size() == 2) + { + BasicBlock* condBB = m_comp->fgNewBBbefore(BBJ_COND, m_resumptionBBs[0], true); + condBB->inheritWeightPercentage(newEntryBB, 0); + + FlowEdge* to0 = m_comp->fgAddRefPred(m_resumptionBBs[0], condBB); + FlowEdge* to1 = m_comp->fgAddRefPred(m_resumptionBBs[1], condBB); + condBB->SetCond(to1, to0); + to1->setLikelihood(0.5); + to0->setLikelihood(0.5); + + resumingEdge = m_comp->fgAddRefPred(condBB, newEntryBB); + + JITDUMP(" Redirecting entry " FMT_BB " to BBJ_COND " FMT_BB " for resumption with 2 states\n", + newEntryBB->bbNum, condBB->bbNum); + + continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned stateOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationStateFldHnd); + GenTree* stateOffsetNode = m_comp->gtNewIconNode((ssize_t)stateOffset, TYP_I_IMPL); + GenTree* stateAddr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, continuationArg, stateOffsetNode); + GenTree* stateInd = m_comp->gtNewIndir(TYP_INT, stateAddr, GTF_IND_NONFAULTING); + GenTree* zero = m_comp->gtNewZeroConNode(TYP_INT); + GenTree* stateNeZero = m_comp->gtNewOperNode(GT_NE, TYP_INT, stateInd, zero); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, stateNeZero); + + LIR::AsRange(condBB).InsertAtEnd(continuationArg, stateOffsetNode, stateAddr, stateInd, zero, stateNeZero, + jtrue); + } + else + { + BasicBlock* switchBB = m_comp->fgNewBBbefore(BBJ_SWITCH, m_resumptionBBs[0], true); + switchBB->inheritWeightPercentage(newEntryBB, 0); + + resumingEdge = m_comp->fgAddRefPred(switchBB, newEntryBB); + + JITDUMP(" Redirecting entry " FMT_BB " to BBJ_SWITCH " FMT_BB " for resumption with %zu states\n", + newEntryBB->bbNum, switchBB->bbNum, m_resumptionBBs.size()); + + continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned stateOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationStateFldHnd); + GenTree* stateOffsetNode = m_comp->gtNewIconNode((ssize_t)stateOffset, TYP_I_IMPL); + GenTree* stateAddr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, continuationArg, stateOffsetNode); + GenTree* stateInd = m_comp->gtNewIndir(TYP_INT, stateAddr, GTF_IND_NONFAULTING); + GenTree* switchNode = m_comp->gtNewOperNode(GT_SWITCH, TYP_VOID, stateInd); + + LIR::AsRange(switchBB).InsertAtEnd(continuationArg, stateOffsetNode, stateAddr, stateInd, switchNode); + + m_comp->fgHasSwitch = true; + + // Default case. TODO-CQ: Support bbsHasDefault = false before lowering. + m_resumptionBBs.push_back(m_resumptionBBs[0]); + BBswtDesc* swtDesc = new (m_comp, CMK_BasicBlock) BBswtDesc; + swtDesc->bbsCount = (unsigned)m_resumptionBBs.size(); + swtDesc->bbsHasDefault = true; + swtDesc->bbsDstTab = new (m_comp, CMK_Async) FlowEdge*[m_resumptionBBs.size()]; + + weight_t stateLikelihood = 1.0 / m_resumptionBBs.size(); + for (size_t i = 0; i < m_resumptionBBs.size(); i++) + { + swtDesc->bbsDstTab[i] = m_comp->fgAddRefPred(m_resumptionBBs[i], switchBB); + swtDesc->bbsDstTab[i]->setLikelihood(stateLikelihood); + } + + switchBB->SetSwitch(swtDesc); + } + + newEntryBB->SetCond(resumingEdge, newEntryBB->GetTargetEdge()); + resumingEdge->setLikelihood(0); + newEntryBB->GetFalseEdge()->setLikelihood(1); + + if (m_comp->doesMethodHavePatchpoints()) + { + JITDUMP(" Method has patch points...\n"); + // If we have patchpoints then first check if we need to resume in the OSR version. + BasicBlock* callHelperBB = m_comp->fgNewBBafter(BBJ_THROW, m_comp->fgLastBBInMainFunction(), false); + callHelperBB->bbSetRunRarely(); + callHelperBB->clearTryIndex(); + callHelperBB->clearHndIndex(); + + JITDUMP(" Created " FMT_BB " for transitions back into OSR method\n", callHelperBB->bbNum); + + BasicBlock* onContinuationBB = newEntryBB->GetTrueTarget(); + BasicBlock* checkILOffsetBB = m_comp->fgNewBBbefore(BBJ_COND, onContinuationBB, true); + + JITDUMP(" Created " FMT_BB " to check whether we should transition immediately to OSR\n", + checkILOffsetBB->bbNum); + + // Redirect newEntryBB -> onContinuationBB into newEntryBB -> checkILOffsetBB -> onContinuationBB + m_comp->fgRemoveRefPred(newEntryBB->GetTrueEdge()); + + FlowEdge* toCheckILOffsetBB = m_comp->fgAddRefPred(checkILOffsetBB, newEntryBB); + newEntryBB->SetTrueEdge(toCheckILOffsetBB); + toCheckILOffsetBB->setLikelihood(0); + checkILOffsetBB->inheritWeightPercentage(newEntryBB, 0); + + FlowEdge* toOnContinuationBB = m_comp->fgAddRefPred(onContinuationBB, checkILOffsetBB); + FlowEdge* toCallHelperBB = m_comp->fgAddRefPred(callHelperBB, checkILOffsetBB); + checkILOffsetBB->SetCond(toCallHelperBB, toOnContinuationBB); + toCallHelperBB->setLikelihood(0); + toOnContinuationBB->setLikelihood(1); + callHelperBB->inheritWeightPercentage(checkILOffsetBB, 0); + + // We need to dispatch to the OSR version if the IL offset is non-negative. + continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned offsetOfData = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationDataFldHnd); + GenTree* dataArr = LoadFromOffset(continuationArg, offsetOfData, TYP_REF); + unsigned offsetOfIlOffset = OFFSETOF__CORINFO_Array__data; + GenTree* ilOffset = LoadFromOffset(dataArr, offsetOfIlOffset, TYP_INT); + unsigned ilOffsetLclNum = m_comp->lvaGrabTemp(false DEBUGARG("IL offset for tier0 OSR method")); + m_comp->lvaGetDesc(ilOffsetLclNum)->lvType = TYP_INT; + GenTree* storeIlOffset = m_comp->gtNewStoreLclVarNode(ilOffsetLclNum, ilOffset); + LIR::AsRange(checkILOffsetBB).InsertAtEnd(LIR::SeqTree(m_comp, storeIlOffset)); + + ilOffset = m_comp->gtNewLclvNode(ilOffsetLclNum, TYP_INT); + GenTree* zero = m_comp->gtNewIconNode(0); + GenTree* geZero = m_comp->gtNewOperNode(GT_GE, TYP_INT, ilOffset, zero); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, geZero); + LIR::AsRange(checkILOffsetBB).InsertAtEnd(ilOffset, zero, geZero, jtrue); + + ilOffset = m_comp->gtNewLclvNode(ilOffsetLclNum, TYP_INT); + GenTreeCall* callHelper = m_comp->gtNewHelperCallNode(CORINFO_HELP_PATCHPOINT_FORCED, TYP_VOID, ilOffset); + callHelper->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN; + + m_comp->compCurBB = callHelperBB; + m_comp->fgMorphTree(callHelper); + + LIR::AsRange(callHelperBB).InsertAtEnd(LIR::SeqTree(m_comp, callHelper)); + } + else if (m_comp->opts.IsOSR()) + { + JITDUMP(" Method is an OSR function\n"); + // If the tier-0 version resumed and then transitioned to the OSR + // version by normal means then we will see a non-zero continuation + // here that belongs to the tier0 method. In that case we should just + // ignore it, so create a BB that jumps back. + BasicBlock* onContinuationBB = newEntryBB->GetTrueTarget(); + BasicBlock* onNoContinuationBB = newEntryBB->GetFalseTarget(); + BasicBlock* checkILOffsetBB = m_comp->fgNewBBbefore(BBJ_COND, onContinuationBB, true); + + // Switch newEntryBB -> onContinuationBB into newEntryBB -> checkILOffsetBB + m_comp->fgRemoveRefPred(newEntryBB->GetTrueEdge()); + FlowEdge* toCheckILOffset = m_comp->fgAddRefPred(checkILOffsetBB, newEntryBB); + newEntryBB->SetTrueEdge(toCheckILOffset); + toCheckILOffset->setLikelihood(0); + checkILOffsetBB->inheritWeightPercentage(newEntryBB, 0); + + // Make checkILOffsetBB ->(true) onNoContinuationBB + // ->(false) onContinuationBB + + FlowEdge* toOnContinuationBB = m_comp->fgAddRefPred(onContinuationBB, checkILOffsetBB); + FlowEdge* toOnNoContinuationBB = m_comp->fgAddRefPred(onNoContinuationBB, checkILOffsetBB); + checkILOffsetBB->SetCond(toOnNoContinuationBB, toOnContinuationBB); + toOnContinuationBB->setLikelihood(0); + toOnNoContinuationBB->setLikelihood(1); + + JITDUMP(" Created " FMT_BB " to check for Tier-0 continuations\n", checkILOffsetBB->bbNum); + + continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned offsetOfData = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationDataFldHnd); + GenTree* dataArr = LoadFromOffset(continuationArg, offsetOfData, TYP_REF); + unsigned offsetOfIlOffset = OFFSETOF__CORINFO_Array__data; + GenTree* ilOffset = LoadFromOffset(dataArr, offsetOfIlOffset, TYP_INT); + GenTree* zero = m_comp->gtNewIconNode(0); + GenTree* ltZero = m_comp->gtNewOperNode(GT_LT, TYP_INT, ilOffset, zero); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, ltZero); + LIR::AsRange(checkILOffsetBB).InsertAtEnd(LIR::SeqTree(m_comp, jtrue)); + } +} diff --git a/src/coreclr/jit/async.h b/src/coreclr/jit/async.h new file mode 100644 index 00000000000000..63e1db0a636ed0 --- /dev/null +++ b/src/coreclr/jit/async.h @@ -0,0 +1,152 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +struct LiveLocalInfo +{ + unsigned LclNum; + unsigned Alignment; + unsigned DataOffset; + unsigned DataSize; + unsigned GCDataIndex; + unsigned GCDataCount; + + explicit LiveLocalInfo(unsigned lclNum) + : LclNum(lclNum) + { + } +}; + +struct ContinuationLayout +{ + unsigned DataSize = 0; + unsigned GCRefsCount = 0; + ClassLayout* ReturnStructLayout = nullptr; + unsigned ReturnSize = 0; + bool ReturnInGCData = false; + unsigned ReturnValDataOffset = UINT_MAX; + unsigned ExceptionGCDataIndex = UINT_MAX; + const jitstd::vector& Locals; + + explicit ContinuationLayout(const jitstd::vector& locals) + : Locals(locals) + { + } +}; + +struct CallDefinitionInfo +{ + GenTreeLclVarCommon* DefinitionNode = nullptr; + + // Where to insert new IR for suspension checks. + GenTree* InsertAfter = nullptr; +}; + +class AsyncTransformation +{ + friend class AsyncLiveness; + + Compiler* m_comp; + jitstd::vector m_liveLocalsScratch; + CORINFO_ASYNC_INFO m_asyncInfo; + jitstd::vector m_resumptionBBs; + CORINFO_METHOD_HANDLE m_resumeStub = NO_METHOD_HANDLE; + CORINFO_CONST_LOOKUP m_resumeStubLookup; + unsigned m_returnedContinuationVar = BAD_VAR_NUM; + unsigned m_newContinuationVar = BAD_VAR_NUM; + unsigned m_dataArrayVar = BAD_VAR_NUM; + unsigned m_gcDataArrayVar = BAD_VAR_NUM; + unsigned m_resultBaseVar = BAD_VAR_NUM; + unsigned m_exceptionVar = BAD_VAR_NUM; + BasicBlock* m_lastSuspensionBB = nullptr; + BasicBlock* m_lastResumptionBB = nullptr; + BasicBlock* m_sharedReturnBB = nullptr; + + bool IsLive(unsigned lclNum); + void Transform(BasicBlock* block, + GenTreeCall* call, + jitstd::vector& defs, + class AsyncLiveness& life, + BasicBlock** remainder); + + void CreateLiveSetForSuspension(BasicBlock* block, + GenTreeCall* call, + const jitstd::vector& defs, + AsyncLiveness& life, + jitstd::vector& liveLocals); + + void LiftLIREdges(BasicBlock* block, + const jitstd::vector& defs, + jitstd::vector& liveLocals); + + ContinuationLayout LayOutContinuation(BasicBlock* block, + GenTreeCall* call, + jitstd::vector& liveLocals); + + CallDefinitionInfo CanonicalizeCallDefinition(BasicBlock* block, GenTreeCall* call, AsyncLiveness& life); + + BasicBlock* CreateSuspension(BasicBlock* block, + unsigned stateNum, + AsyncLiveness& life, + const ContinuationLayout& layout); + GenTreeCall* CreateAllocContinuationCall(AsyncLiveness& life, + GenTree* prevContinuation, + unsigned gcRefsCount, + unsigned int dataSize); + void FillInGCPointersOnSuspension(const jitstd::vector& liveLocals, BasicBlock* suspendBB); + void FillInDataOnSuspension(const jitstd::vector& liveLocals, BasicBlock* suspendBB); + void CreateCheckAndSuspendAfterCall(BasicBlock* block, + const CallDefinitionInfo& callDefInfo, + AsyncLiveness& life, + BasicBlock* suspendBB, + BasicBlock** remainder); + + BasicBlock* CreateResumption(BasicBlock* block, + BasicBlock* remainder, + GenTreeCall* call, + const CallDefinitionInfo& callDefInfo, + unsigned stateNum, + const ContinuationLayout& layout); + void RestoreFromDataOnResumption(unsigned resumeByteArrLclNum, + const jitstd::vector& liveLocals, + BasicBlock* resumeBB); + void RestoreFromGCPointersOnResumption(unsigned resumeObjectArrLclNum, + const jitstd::vector& liveLocals, + BasicBlock* resumeBB); + BasicBlock* RethrowExceptionOnResumption(BasicBlock* block, + BasicBlock* remainder, + unsigned resumeObjectArrLclNum, + const ContinuationLayout& layout, + BasicBlock* resumeBB); + void CopyReturnValueOnResumption(GenTreeCall* call, + const CallDefinitionInfo& callDefInfo, + unsigned resumeByteArrLclNum, + unsigned resumeObjectArrLclNum, + const ContinuationLayout& layout, + BasicBlock* storeResultBB); + + GenTreeIndir* LoadFromOffset(GenTree* base, + unsigned offset, + var_types type, + GenTreeFlags indirFlags = GTF_IND_NONFAULTING); + GenTreeStoreInd* StoreAtOffset(GenTree* base, unsigned offset, GenTree* value, var_types storeType); + + unsigned GetDataArrayVar(); + unsigned GetGCDataArrayVar(); + unsigned GetResultBaseVar(); + unsigned GetExceptionVar(); + + GenTree* CreateResumptionStubAddrTree(); + GenTree* CreateFunctionTargetAddr(CORINFO_METHOD_HANDLE methHnd, const CORINFO_CONST_LOOKUP& lookup); + + void CreateResumptionSwitch(); + +public: + AsyncTransformation(Compiler* comp) + : m_comp(comp) + , m_liveLocalsScratch(comp->getAllocator(CMK_Async)) + , m_resumptionBBs(comp->getAllocator(CMK_Async)) + { + } + + PhaseStatus Run(); +}; diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index d0a6833400faa1..cbbf8249c669dc 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -527,6 +527,7 @@ void BasicBlock::dspFlags() const {BBF_HAS_ALIGN, "has-align"}, {BBF_HAS_MDARRAYREF, "mdarr"}, {BBF_NEEDS_GCPOLL, "gcpoll"}, + {BBF_ASYNC_RESUMPTION, "resume"}, }; bool first = true; diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 35595928fc7a5c..83754de7d7bef0 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -463,6 +463,7 @@ enum BasicBlockFlags : uint64_t BBF_HAS_VALUE_PROFILE = MAKE_BBFLAG(38), // Block has a node that needs a value probing BBF_HAS_NEWARR = MAKE_BBFLAG(39), // BB contains 'new' of an array type. BBF_MAY_HAVE_BOUNDS_CHECKS = MAKE_BBFLAG(40), // BB *likely* has a bounds check (after rangecheck phase). + BBF_ASYNC_RESUMPTION = MAKE_BBFLAG(41), // Block is a resumption block in an async method // The following are sets of flags. @@ -480,7 +481,7 @@ enum BasicBlockFlags : uint64_t // For example, the top block might or might not have BBF_GC_SAFE_POINT, // but we assume it does not have BBF_GC_SAFE_POINT any more. - BBF_SPLIT_LOST = BBF_GC_SAFE_POINT | BBF_NEEDS_GCPOLL | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_RECURSIVE_TAILCALL, + BBF_SPLIT_LOST = BBF_GC_SAFE_POINT | BBF_NEEDS_GCPOLL | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_RECURSIVE_TAILCALL | BBF_ASYNC_RESUMPTION, // Flags gained by the bottom block when a block is split. // Note, this is a conservative guess. @@ -488,7 +489,7 @@ enum BasicBlockFlags : uint64_t // TODO: Should BBF_RUN_RARELY be added to BBF_SPLIT_GAINED ? BBF_SPLIT_GAINED = BBF_DONT_REMOVE | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_PROF_WEIGHT | BBF_HAS_NEWARR | \ - BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_HISTOGRAM_PROFILE | BBF_HAS_VALUE_PROFILE | BBF_HAS_MDARRAYREF | BBF_NEEDS_GCPOLL | BBF_MAY_HAVE_BOUNDS_CHECKS, + BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_HISTOGRAM_PROFILE | BBF_HAS_VALUE_PROFILE | BBF_HAS_MDARRAYREF | BBF_NEEDS_GCPOLL | BBF_MAY_HAVE_BOUNDS_CHECKS | BBF_ASYNC_RESUMPTION, // Flags that must be propagated to a new block if code is copied from a block to a new block. These are flags that // limit processing of a block if the code in question doesn't exist. This is conservative; we might not @@ -1411,6 +1412,16 @@ struct BasicBlock : private LIR::Range m_firstNode = tree; } + GenTree* GetLastLIRNode() const + { + return m_lastNode; + } + + void SetLastLIRNode(GenTree* tree) + { + m_lastNode = tree; + } + EntryState* bbEntryState; // verifier tracked state of all entries in stack. #define NO_BASE_TMP UINT_MAX // base# to use when we have none diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index aa11e1fa450a33..1ba14dd4394a5e 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1214,6 +1214,7 @@ class CodeGen final : public CodeGenInterface #ifdef SWIFT_SUPPORT void genCodeForSwiftErrorReg(GenTree* tree); #endif // SWIFT_SUPPORT + void genCodeForAsyncContinuation(GenTree* tree); void genCodeForNullCheck(GenTreeIndir* tree); void genCodeForCmpXchg(GenTreeCmpXchg* tree); void genCodeForReuseVal(GenTree* treeNode); @@ -1346,6 +1347,8 @@ class CodeGen final : public CodeGenInterface #endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 void genReturn(GenTree* treeNode); + void genReturnSuspend(GenTreeUnOp* treeNode); + void genMarkReturnGCInfo(); #ifdef SWIFT_SUPPORT void genSwiftErrorReturn(GenTree* treeNode); diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index cd5a36efb0b408..30eab87b3b4b60 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -2081,7 +2081,16 @@ regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskC // We can't do this optimization with callee saved floating point registers because // the stack would be allocated in a wrong spot. if (maskCalleeSavedFloat != RBM_NONE) + { + return RBM_NONE; + } + + // We similarly skip it for async due to the extra async continuation + // return that may be overridden by the pop. + if (compiler->compIsAsync()) + { return RBM_NONE; + } // Allocate space for small frames by pushing extra registers. It generates smaller and faster code // that extra sub sp,XXX/add sp,XXX. diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 965dcb59d50536..7969d33a14c9b7 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -295,6 +295,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; #endif // SWIFT_SUPPORT + case GT_RETURN_SUSPEND: + genReturnSuspend(treeNode->AsUnOp()); + break; + case GT_LEA: // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction. genLeaInstruction(treeNode->AsAddrMode()); @@ -510,6 +514,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genConsumeReg(treeNode); break; + case GT_ASYNC_CONTINUATION: + genCodeForAsyncContinuation(treeNode); + break; + case GT_PINVOKE_PROLOG: noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask(compiler->info.compCallConv)) == 0); @@ -3489,7 +3497,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call) assert(params.secondRetSize != EA_BYREF); #endif - params.isJump = call->IsFastTailCall(); + params.isJump = call->IsFastTailCall(); + params.hasAsyncRet = call->IsAsync(); // We need to propagate the debug information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. @@ -3537,6 +3546,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } } #endif // DEBUG + GenTree* target = getCallTarget(call, ¶ms.methHnd); if (target != nullptr) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 98ccc936694bad..41cd11548d5d67 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -1953,6 +1953,11 @@ void CodeGen::genGenerateMachineCode() printf("; OSR variant for entry point 0x%x\n", compiler->info.compILEntry); } + if (compiler->compIsAsync()) + { + printf("; async\n"); + } + if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT) { printf("; optimized code\n"); @@ -7253,23 +7258,14 @@ void CodeGen::genReturn(GenTree* treeNode) } } - if (treeNode->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET)) + if (treeNode->OperIs(GT_RETURN) && compiler->compIsAsync()) { - const ReturnTypeDesc& retTypeDesc = compiler->compRetTypeDesc; + instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_ASYNC_CONTINUATION_RET); + } - if (compiler->compMethodReturnsRetBufAddr()) - { - gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF); - } - else - { - unsigned retRegCount = retTypeDesc.GetReturnRegCount(); - for (unsigned i = 0; i < retRegCount; ++i) - { - gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv), - retTypeDesc.GetReturnRegType(i)); - } - } + if (treeNode->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET)) + { + genMarkReturnGCInfo(); } #ifdef PROFILING_SUPPORTED @@ -7336,6 +7332,83 @@ void CodeGen::genSwiftErrorReturn(GenTree* treeNode) } #endif // SWIFT_SUPPORT +//------------------------------------------------------------------------ +// genReturnSuspend: +// Generate code for a GT_RETURN_SUSPEND node +// +// Arguments: +// treeNode - The node +// +void CodeGen::genReturnSuspend(GenTreeUnOp* treeNode) +{ + GenTree* op = treeNode->gtGetOp1(); + assert(op->TypeIs(TYP_REF)); + + regNumber reg = genConsumeReg(op); + inst_Mov(TYP_REF, REG_ASYNC_CONTINUATION_RET, reg, /* canSkip */ true); + + ReturnTypeDesc retTypeDesc = compiler->compRetTypeDesc; + unsigned numRetRegs = retTypeDesc.GetReturnRegCount(); + for (unsigned i = 0; i < numRetRegs; i++) + { + if (varTypeIsGC(retTypeDesc.GetReturnRegType(i))) + { + regNumber returnReg = retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv); + instGen_Set_Reg_To_Zero(EA_PTRSIZE, returnReg); + } + } + + genMarkReturnGCInfo(); +} + +//------------------------------------------------------------------------ +// genMarkReturnGCInfo: +// Mark GC and non-GC pointers of return registers going into the epilog.. +// +void CodeGen::genMarkReturnGCInfo() +{ + const ReturnTypeDesc& retTypeDesc = compiler->compRetTypeDesc; + + if (compiler->compMethodReturnsRetBufAddr()) + { + gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF); + } + else + { + unsigned retRegCount = retTypeDesc.GetReturnRegCount(); + for (unsigned i = 0; i < retRegCount; ++i) + { + gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv), + retTypeDesc.GetReturnRegType(i)); + } + } + + if (compiler->compIsAsync()) + { + gcInfo.gcMarkRegPtrVal(REG_ASYNC_CONTINUATION_RET, TYP_REF); + } +} + +//------------------------------------------------------------------------ +// genCodeForAsyncContinuation: +// Generate code for a GT_ASYNC_CONTINUATION node. +// +// Arguments: +// tree - The node +// +void CodeGen::genCodeForAsyncContinuation(GenTree* tree) +{ + assert(tree->OperIs(GT_ASYNC_CONTINUATION)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + + inst_Mov(targetType, targetReg, REG_ASYNC_CONTINUATION_RET, /* canSkip */ true); + genTransferRegGCState(targetReg, REG_ASYNC_CONTINUATION_RET); + + genProduceReg(tree); +} + //------------------------------------------------------------------------ // isStructReturn: Returns whether the 'treeNode' is returning a struct. // diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 4b18015182c1ac..d7d0ff2d7c3be9 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -503,8 +503,13 @@ void CodeGen::genCodeForBBlist() } } + if (compiler->compIsAsync()) + { + nonVarPtrRegs &= ~RBM_ASYNC_CONTINUATION_RET; + } + // For a tailcall arbitrary argument registers may be live into the - // prolog. Skip validating those. + // epilog. Skip validating those. if (block->HasFlag(BBF_HAS_JMP)) { nonVarPtrRegs &= ~fullIntArgRegMask(CorInfoCallConvExtension::Managed); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 665188ecd40616..762fb0082b14ff 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6017,7 +6017,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } } - params.isJump = call->IsFastTailCall(); + params.isJump = call->IsFastTailCall(); + params.hasAsyncRet = call->IsAsync(); // We need to propagate the debug information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 9fe50675af2bae..b7a6ee9516c937 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6039,7 +6039,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } } - params.isJump = call->IsFastTailCall(); + params.isJump = call->IsFastTailCall(); + params.hasAsyncRet = call->IsAsync(); // We need to propagate the debug information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index dfa8d5b1545ccc..ba2d3c5f053928 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2056,6 +2056,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; #endif // SWIFT_SUPPORT + case GT_RETURN_SUSPEND: + genReturnSuspend(treeNode->AsUnOp()); + break; + case GT_LEA: // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction. genLeaInstruction(treeNode->AsAddrMode()); @@ -2240,6 +2244,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genConsumeReg(treeNode); break; + case GT_ASYNC_CONTINUATION: + genCodeForAsyncContinuation(treeNode); + break; + #if defined(FEATURE_EH_WINDOWS_X86) case GT_END_LFIN: { @@ -6340,7 +6348,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA } } - params.isJump = call->IsFastTailCall(); + params.isJump = call->IsFastTailCall(); + params.hasAsyncRet = call->IsAsync(); // We need to propagate the IL offset information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. @@ -10571,10 +10580,10 @@ void CodeGen::genFnEpilog(BasicBlock* block) if (frameSize > 0) { #ifdef TARGET_X86 - /* Add 'compiler->compLclFrameSize' to ESP */ - /* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */ + // Add 'compiler->compLclFrameSize' to ESP. Use "pop ECX" for that, except in cases + // where ECX may contain some state. - if ((frameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed) + if ((frameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed && !compiler->compIsAsync()) { inst_RV(INS_pop, REG_ECX, TYP_I_IMPL); regSet.verifyRegUsed(REG_ECX); @@ -10582,8 +10591,8 @@ void CodeGen::genFnEpilog(BasicBlock* block) else #endif // TARGET_X86 { - /* Add 'compiler->compLclFrameSize' to ESP */ - /* Generate "add esp, " */ + // Add 'compiler->compLclFrameSize' to ESP + // Generate "add esp, " inst_RV_IV(INS_add, REG_SPBASE, frameSize, EA_PTRSIZE); } } @@ -10661,7 +10670,8 @@ void CodeGen::genFnEpilog(BasicBlock* block) // do nothing before popping the callee-saved registers } #ifdef TARGET_X86 - else if (compiler->compLclFrameSize == REGSIZE_BYTES) + else if ((compiler->compLclFrameSize == REGSIZE_BYTES) && !compiler->compJmpOpUsed && + !compiler->compIsAsync()) { // "pop ecx" will make ESP point to the callee-saved registers inst_RV(INS_pop, REG_ECX, TYP_I_IMPL); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 1ea2e0a4c6be3d..52721a6ee0ff10 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -488,8 +488,9 @@ Compiler::Compiler(ArenaAllocator* arena, info.compILCodeSize = methodInfo->ILCodeSize; info.compILImportSize = 0; - info.compHasNextCallRetAddr = false; - info.compIsVarArgs = false; + info.compHasNextCallRetAddr = false; + info.compIsVarArgs = false; + info.compUsesAsyncContinuation = false; } //------------------------------------------------------------------------ @@ -3159,6 +3160,11 @@ void Compiler::compInitOptions(JitFlags* jitFlags) { printf("OPTIONS: Jit invoked for AOT\n"); } + + if (compIsAsync()) + { + printf("OPTIONS: compilation is an async state machine\n"); + } } #endif @@ -5006,6 +5012,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl } #endif // TARGET_ARM + if (compIsAsync()) + { + DoPhase(this, PHASE_ASYNC, &Compiler::TransformAsync); + } + // Assign registers to variables, etc. // Create LinearScan before Lowering, so that Lowering can call LinearScan methods diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f40de4429071a7..2419e099502306 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -683,6 +683,15 @@ class LclVarDsc unsigned char lvAllDefsAreNoGc : 1; // For pinned locals: true if all defs of this local are no-gc unsigned char lvStackAllocatedObject : 1; // Local is a stack allocated object (class, box, array, ...) + bool IsImplicitByRef() + { +#if FEATURE_IMPLICIT_BYREFS + return lvIsImplicitByRef; +#else + return false; +#endif + } + // lvIsMultiRegArgOrRet() // returns true if this is a multireg LclVar struct used in an argument context // or if this is a multireg LclVar struct assigned from a multireg call @@ -3067,7 +3076,7 @@ class Compiler GenTreeCall* gtNewIndCallNode(GenTree* addr, var_types type, const DebugInfo& di = DebugInfo()); GenTreeCall* gtNewHelperCallNode( - unsigned helper, var_types type, GenTree* arg1 = nullptr, GenTree* arg2 = nullptr, GenTree* arg3 = nullptr); + unsigned helper, var_types type, GenTree* arg1 = nullptr, GenTree* arg2 = nullptr, GenTree* arg3 = nullptr, GenTree* arg4 = nullptr); GenTreeCall* gtNewRuntimeLookupHelperCallNode(CORINFO_RUNTIME_LOOKUP* pRuntimeLookup, GenTree* ctxTree, @@ -3990,6 +3999,9 @@ class Compiler unsigned lvaSwiftErrorLocal; #endif + // Variable representing async continuation argument passed. + unsigned lvaAsyncContinuationArg = BAD_VAR_NUM; + #if defined(DEBUG) && defined(TARGET_XARCH) unsigned lvaReturnSpCheck = BAD_VAR_NUM; // Stores SP to confirm it is not corrupted on return. @@ -4101,6 +4113,7 @@ class Compiler void lvaInitUserArgs(unsigned* curVarNum, unsigned skipArgs, unsigned takeArgs); void lvaInitGenericsCtxt(unsigned* curVarNum); void lvaInitVarArgsHandle(unsigned* curVarNum); + void lvaInitAsyncContinuation(unsigned* curVarNum); void lvaInitVarDsc(LclVarDsc* varDsc, unsigned varNum, @@ -4843,6 +4856,8 @@ class Compiler bool impMatchIsInstBooleanConversion(const BYTE* codeAddr, const BYTE* codeEndp, int* consumed); + bool impMatchAwaitPattern(const BYTE * codeAddr, const BYTE * codeEndp, int* configVal); + GenTree* impCastClassOrIsInstToTree( GenTree* op1, GenTree* op2, CORINFO_RESOLVED_TOKEN* pResolvedToken, bool isCastClass, bool* booleanCheck, IL_OFFSET ilOffset); @@ -5520,6 +5535,8 @@ class Compiler PhaseStatus placeLoopAlignInstructions(); #endif + PhaseStatus TransformAsync(); + // This field keep the R2R helper call that would be inserted to trigger the constructor // of the static class. It is set as nongc or gc static base if they are imported, so // CSE can eliminate the repeated call, or the chepeast helper function that triggers it. @@ -6843,7 +6860,7 @@ class Compiler void fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* result, InlineContext** createdContext); void fgInsertInlineeBlocks(InlineInfo* pInlineInfo); - void fgInsertInlineeArgument(const InlArgInfo& argInfo, BasicBlock* block, Statement** afterStmt, Statement** newStmt, const DebugInfo& callDI); + void fgInsertInlineeArgument(InlineInfo* pInlineInfo, const InlArgInfo& argInfo, BasicBlock* block, Statement** afterStmt, Statement** newStmt, const DebugInfo& callDI); Statement* fgInlinePrependStatements(InlineInfo* inlineInfo); void fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, Statement* stmt); @@ -7178,6 +7195,7 @@ class Compiler void optPrintCSEDataFlowSet(EXPSET_VALARG_TP cseDataFlowSet, bool includeBits = true); EXPSET_TP cseCallKillsMask; // Computed once - A mask that is used to kill available CSEs at callsites + EXPSET_TP cseAsyncKillsMask; // Computed once - A mask that is used to kill available BYREF CSEs at async suspension points static const size_t s_optCSEhashSizeInitial; static const size_t s_optCSEhashGrowthFactor; @@ -7266,6 +7284,7 @@ class Compiler unsigned optValnumCSE_Index(GenTree* tree, Statement* stmt); bool optValnumCSE_Locate(CSE_HeuristicCommon* heuristic); void optValnumCSE_InitDataFlow(); + void optValnumCSE_SetUpAsyncByrefKills(); void optValnumCSE_DataFlow(); void optValnumCSE_Availability(); void optValnumCSE_Heuristic(CSE_HeuristicCommon* heuristic); @@ -10731,12 +10750,13 @@ class Compiler // (2) the code is hot/cold split, and we issued less code than we expected // in the cold section (the hot section will always be padded out to compTotalHotCodeSize). - bool compIsStatic : 1; // Is the method static (no 'this' pointer)? - bool compIsVarArgs : 1; // Does the method have varargs parameters? - bool compInitMem : 1; // Is the CORINFO_OPT_INIT_LOCALS bit set in the method info options? - bool compProfilerCallback : 1; // JIT inserted a profiler Enter callback - bool compPublishStubParam : 1; // EAX captured in prolog will be available through an intrinsic - bool compHasNextCallRetAddr : 1; // The NextCallReturnAddress intrinsic is used. + bool compIsStatic : 1; // Is the method static (no 'this' pointer)? + bool compIsVarArgs : 1; // Does the method have varargs parameters? + bool compInitMem : 1; // Is the CORINFO_OPT_INIT_LOCALS bit set in the method info options? + bool compProfilerCallback : 1; // JIT inserted a profiler Enter callback + bool compPublishStubParam : 1; // EAX captured in prolog will be available through an intrinsic + bool compHasNextCallRetAddr : 1; // The NextCallReturnAddress intrinsic is used. + bool compUsesAsyncContinuation : 1; // The AsyncCallContinuation intrinsic is used. var_types compRetType; // Return type of the method as declared in IL (including SIMD normalization) var_types compRetNativeType; // Normalized return type as per target arch ABI @@ -10869,6 +10889,11 @@ class Compiler #endif // TARGET_AMD64 } + bool compIsAsync() const + { + return opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ASYNC); + } + //------------------------------------------------------------------------ // compMethodReturnsMultiRegRetType: Does this method return a multi-reg value? // @@ -10893,6 +10918,13 @@ class Compiler bool compObjectStackAllocation() { + if (compIsAsync()) + { + // Object stack allocation takes the address of locals around + // suspension points. Disable entirely for now. + return false; + } + return (JitConfig.JitObjectStackAllocation() != 0); } @@ -11848,6 +11880,7 @@ class GenTreeVisitor // Leaf nodes case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_LABEL: case GT_FTN_ADDR: case GT_RET_EXPR: @@ -11924,6 +11957,7 @@ class GenTreeVisitor case GT_RETURNTRAP: case GT_FIELD_ADDR: case GT_RETURN: + case GT_RETURN_SUSPEND: case GT_RETFILT: case GT_RUNTIMELOOKUP: case GT_ARR_ADDR: diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index a1c563f0ce0ea6..a8d19927838a4d 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -1552,7 +1552,7 @@ inline GenTree* Compiler::gtNewIconEmbFldHndNode(CORINFO_FIELD_HANDLE fldHnd) // New CT_HELPER node // inline GenTreeCall* Compiler::gtNewHelperCallNode( - unsigned helper, var_types type, GenTree* arg1, GenTree* arg2, GenTree* arg3) + unsigned helper, var_types type, GenTree* arg1, GenTree* arg2, GenTree* arg3, GenTree* arg4) { GenTreeCall* const result = gtNewCallNode(CT_HELPER, eeFindHelper(helper), type); @@ -1571,6 +1571,12 @@ inline GenTreeCall* Compiler::gtNewHelperCallNode( result->gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER; #endif + if (arg4 != nullptr) + { + result->gtArgs.PushFront(this, NewCallArg::Primitive(arg4)); + result->gtFlags |= arg4->gtFlags & GTF_ALL_EFFECT; + } + if (arg3 != nullptr) { result->gtArgs.PushFront(this, NewCallArg::Primitive(arg3)); @@ -2921,6 +2927,12 @@ inline unsigned Compiler::compMapILargNum(unsigned ILargNum) assert(ILargNum < info.compLocalsCount); // compLocals count already adjusted. } + if (ILargNum >= lvaAsyncContinuationArg) + { + ILargNum++; + assert(ILargNum < info.compLocalsCount); // compLocals count already adjusted. + } + if (ILargNum >= lvaVarargsHandleArg) { ILargNum++; @@ -4374,6 +4386,7 @@ void GenTree::VisitOperands(TVisitor visitor) case GT_LCL_FLD: case GT_LCL_ADDR: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_LABEL: case GT_FTN_ADDR: case GT_RET_EXPR: @@ -4454,6 +4467,7 @@ void GenTree::VisitOperands(TVisitor visitor) case GT_RETURNTRAP: case GT_KEEPALIVE: case GT_INC_SATURATE: + case GT_RETURN_SUSPEND: visitor(this->AsUnOp()->gtOp1); return; diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h index 8b3f84a0cf3a45..eb2c0dffc0ee6e 100644 --- a/src/coreclr/jit/compmemkind.h +++ b/src/coreclr/jit/compmemkind.h @@ -66,6 +66,7 @@ CompMemKindMacro(ZeroInit) CompMemKindMacro(Pgo) CompMemKindMacro(MaskConversionOpt) CompMemKindMacro(TryRegionClone) +CompMemKindMacro(Async) CompMemKindMacro(RangeCheckCloning) //clang-format on diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 21915bf4a13a2f..2c816dc0ef9a43 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -119,6 +119,7 @@ CompPhaseNameMacro(PHASE_RATIONALIZE, "Rationalize IR", CompPhaseNameMacro(PHASE_REPAIR_PROFILE_POST_MORPH, "Repair profile post-morph", false, -1, false) CompPhaseNameMacro(PHASE_REPAIR_PROFILE_PRE_LAYOUT, "Repair profile pre-layout", false, -1, false) +CompPhaseNameMacro(PHASE_ASYNC, "Transform async", false, -1, true) CompPhaseNameMacro(PHASE_LCLVARLIVENESS, "Local var liveness", true, -1, false) CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INIT, "Local var liveness init", false, PHASE_LCLVARLIVENESS, false) CompPhaseNameMacro(PHASE_LCLVARLIVENESS_PERBLOCK, "Per block local var liveness", false, PHASE_LCLVARLIVENESS, false) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index c317af15093715..b5c78a6b286e3c 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -3573,7 +3573,8 @@ emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSizeIn - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)) + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet) { emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE; @@ -3597,7 +3598,8 @@ emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt, (argCnt > ID_MAX_SMALL_CNS) || // too many args (argCnt < 0) // caller pops arguments // There is a second ref/byref return register. - MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize))) + MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize)) || + hasAsyncRet) { instrDescCGCA* id; @@ -3614,6 +3616,7 @@ emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt, #if MULTIREG_HAS_SECOND_GC_RET emitSetSecondRetRegGCType(id, secondRetSize); #endif // MULTIREG_HAS_SECOND_GC_RET + id->hasAsyncContinuationRet(hasAsyncRet); return id; } @@ -3657,7 +3660,8 @@ emitter::instrDesc* emitter::emitNewInstrCallDir(int argCnt, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSizeIn - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)) + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet) { emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE; @@ -3677,7 +3681,8 @@ emitter::instrDesc* emitter::emitNewInstrCallDir(int argCnt, (argCnt > ID_MAX_SMALL_CNS) || // too many args (argCnt < 0) // caller pops arguments // There is a second ref/byref return register. - MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize))) + MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize)) || + hasAsyncRet) { instrDescCGCA* id = emitAllocInstrCGCA(retSize); @@ -3694,6 +3699,7 @@ emitter::instrDesc* emitter::emitNewInstrCallDir(int argCnt, #if MULTIREG_HAS_SECOND_GC_RET emitSetSecondRetRegGCType(id, secondRetSize); #endif // MULTIREG_HAS_SECOND_GC_RET + id->hasAsyncContinuationRet(hasAsyncRet); return id; } diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 80ac961a3a6d58..3afb0759cf54fc 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -473,6 +473,7 @@ struct EmitCallParams emitAttr retSize = EA_PTRSIZE; // For multi-reg args with GC returns in the second arg emitAttr secondRetSize = EA_UNKNOWN; + bool hasAsyncRet = false; BitVec ptrVars = BitVecOps::UninitVal(); regMaskTP gcrefRegs = RBM_NONE; regMaskTP byrefRegs = RBM_NONE; @@ -2285,8 +2286,19 @@ class emitter { _idcSecondRetRegGCType = gctype; } +#endif + + bool hasAsyncContinuationRet() const + { + return _hasAsyncContinuationRet; + } + void hasAsyncContinuationRet(bool value) + { + _hasAsyncContinuationRet = value; + } private: +#if MULTIREG_HAS_SECOND_GC_RET // This member stores the GC-ness of the second register in a 2 register returned struct on System V. // It is added to the call struct since it is not needed by the base instrDesc struct, which keeps GC-ness // of the first register for the instCall nodes. @@ -2296,6 +2308,7 @@ class emitter // The base struct's member keeping the GC-ness of the first return register is _idGCref. GCtype _idcSecondRetRegGCType : 2; // ... GC type for the second return register. #endif // MULTIREG_HAS_SECOND_GC_RET + bool _hasAsyncContinuationRet : 1; }; // TODO-Cleanup: Uses of stack-allocated instrDescs should be refactored to be unnecessary. diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index 68a02be3e19c77..c2656322b49701 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -4725,7 +4725,8 @@ void emitter::emitIns_Call(const EmitCallParams& params) { /* Indirect call, virtual calls */ - id = emitNewInstrCallInd(argCnt, 0 /* disp */, params.ptrVars, gcrefRegs, byrefRegs, params.retSize); + id = emitNewInstrCallInd(argCnt, 0 /* disp */, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, + params.hasAsyncRet); } else { @@ -4734,7 +4735,7 @@ void emitter::emitIns_Call(const EmitCallParams& params) assert(params.callType == EC_FUNC_TOKEN); - id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize); + id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ @@ -6531,6 +6532,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) else if (id->idGCref() == GCT_BYREF) byrefRegs |= RBM_R0; + if (id->idIsLargeCall() && ((instrDescCGCA*)id)->hasAsyncContinuationRet()) + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + // If the GC register set has changed, report the new set. if (gcrefRegs != emitThisGCrefRegs) emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst); diff --git a/src/coreclr/jit/emitarm.h b/src/coreclr/jit/emitarm.h index 829955ba25c8a1..6e3eb5793c54aa 100644 --- a/src/coreclr/jit/emitarm.h +++ b/src/coreclr/jit/emitarm.h @@ -65,10 +65,15 @@ void emitDispInsHelp(instrDesc* id, private: instrDesc* emitNewInstrCallDir( - int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize); - -instrDesc* emitNewInstrCallInd( - int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize); + int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize, bool hasAsyncRet); + +instrDesc* emitNewInstrCallInd(int argCnt, + ssize_t disp, + VARSET_VALARG_TP GCvars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + emitAttr retSize, + bool hasAsyncRet); /************************************************************************/ /* Private helpers for instruction output */ diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 4a82aac23ecdf5..915b496a950559 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9155,7 +9155,7 @@ void emitter::emitIns_Call(const EmitCallParams& params) /* Indirect call, virtual calls */ id = emitNewInstrCallInd(argCnt, 0 /* disp */, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, - params.secondRetSize); + params.secondRetSize, params.hasAsyncRet); } else { @@ -9164,7 +9164,8 @@ void emitter::emitIns_Call(const EmitCallParams& params) assert(params.callType == EC_FUNC_TOKEN); - id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize); + id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize, + params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ @@ -10894,6 +10895,10 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t { byrefRegs |= RBM_INTRET_1; } + if (idCall->hasAsyncContinuationRet()) + { + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + } } // If the GC register set has changed, report the new set. diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 05a46ee000c200..c30ab5a57dec82 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -98,7 +98,8 @@ instrDesc* emitNewInstrCallDir(int argCnt, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize, - emitAttr secondRetSize); + emitAttr secondRetSize, + bool hasAsyncRet); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, @@ -106,7 +107,8 @@ instrDesc* emitNewInstrCallInd(int argCnt, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize, - emitAttr secondRetSize); + emitAttr secondRetSize, + bool hasAsyncRet); /************************************************************************/ /* enum to allow instruction optimisation to specify register order */ diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index 5e445fa165e18b..89a2060f0727f1 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -2436,7 +2436,7 @@ void emitter::emitIns_Call(const EmitCallParams& params) assert(params.callType == EC_INDIR_R); id = emitNewInstrCallInd(argCnt, params.disp, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, - params.secondRetSize); + params.secondRetSize, params.hasAsyncRet); } else { @@ -2445,7 +2445,8 @@ void emitter::emitIns_Call(const EmitCallParams& params) assert(params.callType == EC_FUNC_TOKEN); - id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize); + id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize, + params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ @@ -2713,6 +2714,10 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t { byrefRegs |= RBM_INTRET_1; } + if (idCall->hasAsyncContinuationRet()) + { + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + } } // If the GC register set has changed, report the new set. diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h index 4cc730c51f1ae2..49f7bb702a7422 100644 --- a/src/coreclr/jit/emitloongarch64.h +++ b/src/coreclr/jit/emitloongarch64.h @@ -117,14 +117,16 @@ instrDesc* emitNewInstrCallDir(int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); /************************************************************************/ /* Private helpers for instruction output */ diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index 9104fbc97d5ed4..9731c9bfc0f58f 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -1661,7 +1661,7 @@ void emitter::emitIns_Call(const EmitCallParams& params) assert(params.callType == EC_INDIR_R); id = emitNewInstrCallInd(argCnt, params.disp, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, - params.secondRetSize); + params.secondRetSize, params.hasAsyncRet); } else { @@ -1670,7 +1670,8 @@ void emitter::emitIns_Call(const EmitCallParams& params) assert(params.callType == EC_FUNC_TOKEN); - id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize); + id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize, + params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ @@ -1983,6 +1984,10 @@ unsigned emitter::emitOutputCall(const insGroup* ig, BYTE* dst, instrDesc* id, c { byrefRegs |= RBM_INTRET_1; } + if (idCall->hasAsyncContinuationRet()) + { + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + } } // If the GC register set has changed, report the new set. diff --git a/src/coreclr/jit/emitriscv64.h b/src/coreclr/jit/emitriscv64.h index 249ae35951cc37..cbb2b11ec96747 100644 --- a/src/coreclr/jit/emitriscv64.h +++ b/src/coreclr/jit/emitriscv64.h @@ -42,14 +42,16 @@ instrDesc* emitNewInstrCallDir(int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); /************************************************************************/ /* Private helpers for instruction output */ diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index ef2594be83d116..ea32168c74bf9d 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -11165,7 +11165,8 @@ void emitter::emitIns_Call(const EmitCallParams& params) /* Indirect call, virtual calls */ id = emitNewInstrCallInd(argCnt, params.disp, params.ptrVars, gcrefRegs, byrefRegs, - params.retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(params.secondRetSize)); + params.retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(params.secondRetSize), + params.hasAsyncRet); } else { @@ -11175,7 +11176,8 @@ void emitter::emitIns_Call(const EmitCallParams& params) assert(params.callType == EC_FUNC_TOKEN || params.callType == EC_FUNC_TOKEN_INDIR); id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, - params.retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(params.secondRetSize)); + params.retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(params.secondRetSize), + params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ @@ -18577,11 +18579,11 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) byrefRegs |= RBM_EAX; } -#ifdef UNIX_AMD64_ABI // If is a multi-register return method is called, mark RDX appropriately (for System V AMD64). if (id->idIsLargeCall()) { instrDescCGCA* idCall = (instrDescCGCA*)id; +#ifdef UNIX_AMD64_ABI if (idCall->idSecondGCref() == GCT_GCREF) { gcrefRegs |= RBM_RDX; @@ -18590,8 +18592,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { byrefRegs |= RBM_RDX; } - } #endif // UNIX_AMD64_ABI + if (idCall->hasAsyncContinuationRet()) + { + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + } + } // If the GC register set has changed, report the new set if (gcrefRegs != emitThisGCrefRegs) diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 90112a316b2c96..ee5f4b0e9d6ea8 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -756,14 +756,16 @@ instrDesc* emitNewInstrCallDir(int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); void emitGetInsCns(const instrDesc* id, CnsVal* cv) const; ssize_t emitGetInsAmdCns(const instrDesc* id, CnsVal* cv) const; diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 9905526766a9cf..8e1e2199253776 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -2578,7 +2578,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed // change. The original this (info.compThisArg) then remains // unmodified in the method. fgAddInternal is responsible for // adding the code to copy the initial this into the temp. - +// void Compiler::fgAdjustForAddressExposedOrWrittenThis() { LclVarDsc* thisVarDsc = lvaGetDesc(info.compThisArg); @@ -4872,7 +4872,9 @@ BasicBlock* Compiler::fgSplitBlockAtBeginning(BasicBlock* curr) if (curr->IsLIR()) { newBlock->SetFirstLIRNode(curr->GetFirstLIRNode()); + newBlock->SetLastLIRNode(curr->GetLastLIRNode()); curr->SetFirstLIRNode(nullptr); + curr->SetLastLIRNode(nullptr); } else { diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 5410d946e97840..3a00afe819eb99 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -2706,6 +2706,14 @@ bool BBPredsChecker::CheckEhTryDsc(BasicBlock* block, BasicBlock* blockPred, EHb return true; } + // Async resumptions are allowed to jump into try blocks at any point. They + // are introduced late enough that the invariant of single entry is no + // longer necessary. + if (blockPred->HasFlag(BBF_ASYNC_RESUMPTION)) + { + return true; + } + printf("Jump into the middle of try region: " FMT_BB " branches to " FMT_BB "\n", blockPred->bbNum, block->bbNum); assert(!"Jump into middle of try region"); return false; @@ -3102,7 +3110,8 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef // A branch or fall-through to a BBJ_CALLFINALLY block must come from the `try` region associated // with the finally block the BBJ_CALLFINALLY is targeting. There is one special case: if the // BBJ_CALLFINALLY is the first block of a `try`, then its predecessor can be outside the `try`: - // either a branch or fall-through to the first block. + // either a branch or fall-through to the first block. Similarly internal resumption blocks for + // async are allowed to do this as they are introduced late enough that we no longer need the invariant. // // Note that this IR condition is a choice. It naturally occurs when importing EH constructs. // This condition prevents flow optimizations from skipping blocks in a `try` and branching @@ -3140,7 +3149,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef } else { - assert(bbInTryRegions(finallyIndex, block)); + assert(bbInTryRegions(finallyIndex, block) || block->HasFlag(BBF_ASYNC_RESUMPTION)); } } } @@ -3357,6 +3366,7 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block) break; case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: expectedFlags |= GTF_ORDER_SIDEEFF; break; @@ -3629,6 +3639,10 @@ void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, Statement* stmt) // The root of the tree should have GTF_ORDER_SIDEEFF set noway_assert(stmt->GetRootNode()->gtFlags & GTF_ORDER_SIDEEFF); } + else if (tree->OperIs(GT_ASYNC_CONTINUATION)) + { + assert(tree->gtFlags & GTF_ORDER_SIDEEFF); + } } if (tree->OperIsUnary() && tree->AsOp()->gtOp1) diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index e6be97f1d38e74..f9e8c4686018d1 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -1072,6 +1072,14 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result, return; } + if (call->gtIsAsyncCall && info.compUsesAsyncContinuation) + { + // Currently not supported. Could provide a nice perf benefit for + // Task -> runtime async thunks if we supported it. + result->NoteFatal(InlineObservation::CALLER_ASYNC_USED_CONTINUATION); + return; + } + // impMarkInlineCandidate() is expected not to mark tail prefixed calls // and recursive tail calls as inline candidates. noway_assert(!call->IsTailPrefixedCall()); @@ -1994,8 +2002,12 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) // newStmt - updated with the new statement // callDI - debug info for the call // -void Compiler::fgInsertInlineeArgument( - const InlArgInfo& argInfo, BasicBlock* block, Statement** afterStmt, Statement** newStmt, const DebugInfo& callDI) +void Compiler::fgInsertInlineeArgument(InlineInfo* inlineInfo, + const InlArgInfo& argInfo, + BasicBlock* block, + Statement** afterStmt, + Statement** newStmt, + const DebugInfo& callDI) { const bool argIsSingleDef = !argInfo.argHasLdargaOp && !argInfo.argHasStargOp; CallArg* arg = argInfo.arg; @@ -2229,6 +2241,7 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) switch (arg.GetWellKnownArg()) { case WellKnownArg::RetBuffer: + case WellKnownArg::AsyncContinuation: continue; case WellKnownArg::InstParam: argInfo = inlineInfo->inlInstParamArgInfo; @@ -2240,7 +2253,7 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) } assert(argInfo != nullptr); - fgInsertInlineeArgument(*argInfo, block, &afterStmt, &newStmt, callDI); + fgInsertInlineeArgument(inlineInfo, *argInfo, block, &afterStmt, &newStmt, callDI); } // Add the CCTOR check if asked for. diff --git a/src/coreclr/jit/fgstmt.cpp b/src/coreclr/jit/fgstmt.cpp index 85809339965ff0..f5ab387e262416 100644 --- a/src/coreclr/jit/fgstmt.cpp +++ b/src/coreclr/jit/fgstmt.cpp @@ -539,6 +539,7 @@ inline bool OperIsControlFlow(genTreeOps oper) case GT_RETURN: case GT_RETFILT: case GT_SWIFT_ERROR_RET: + case GT_RETURN_SUSPEND: #if defined(FEATURE_EH_WINDOWS_X86) case GT_END_LFIN: #endif // FEATURE_EH_WINDOWS_X86 diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 539a8ca7c7c4b6..2ea6f6560b972e 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2449,7 +2449,18 @@ PhaseStatus Compiler::fgAddInternal() } else { - merger.SetMaxReturns(MergedReturns::ReturnCountHardLimit); + unsigned limit = MergedReturns::ReturnCountHardLimit; +#ifdef JIT32_GCENCODER + // For the jit32 GC encoder the limit is an actual hard limit. In + // async functions we will be introducing another return during + // the async transformation, so make sure there's a free epilog + // for it. + if (compIsAsync()) + { + limit--; + } +#endif + merger.SetMaxReturns(limit); } } diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index c54e53d27bde49..4f004ed800c935 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -502,7 +502,7 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) // Can't substitute GT_LCLHEAP. // // Don't substitute a no return call (trips up morph in some cases). - if (fwdSubNode->OperIs(GT_CATCH_ARG, GT_LCLHEAP)) + if (fwdSubNode->OperIs(GT_CATCH_ARG, GT_LCLHEAP, GT_ASYNC_CONTINUATION)) { JITDUMP(" tree to sub is catch arg, or lcl heap\n"); return false; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 5bf5fb170af2f1..9e2c1f21ef5e1a 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -1241,6 +1241,7 @@ CallArgs::CallArgs() #endif , m_hasThisPointer(false) , m_hasRetBuffer(false) + , m_hasAsyncContinuation(false) , m_isVarArgs(false) , m_abiInformationDetermined(false) , m_hasAddedFinalArgs(false) @@ -1519,6 +1520,9 @@ void CallArgs::AddedWellKnownArg(WellKnownArg arg) case WellKnownArg::RetBuffer: m_hasRetBuffer = true; break; + case WellKnownArg::AsyncContinuation: + m_hasAsyncContinuation = true; + break; default: break; } @@ -1542,6 +1546,10 @@ void CallArgs::RemovedWellKnownArg(WellKnownArg arg) assert(FindWellKnownArg(arg) == nullptr); m_hasRetBuffer = false; break; + case WellKnownArg::AsyncContinuation: + assert(FindWellKnownArg(arg) == nullptr); + m_hasAsyncContinuation = false; + break; default: break; } @@ -2247,6 +2255,36 @@ bool GenTreeCall::HasSideEffects(Compiler* compiler, bool ignoreExceptions, bool (!helperProperties.IsAllocator(helper) || ((gtCallMoreFlags & GTF_CALL_M_ALLOC_SIDE_EFFECTS) != 0)); } +//------------------------------------------------------------------------- +// IsAsync: Whether or not this call is to an async function. +// +// Return Value: +// True if so. +// +// Remarks: +// async involves passing an async continuation as a separate argument and +// returning an async continuation in REG_ASYNC_CONTINUATION_RET. +// +// The async continuation is usually JIT added +// (WellKnownArg::AsyncContinuation). This is the case for an async method +// calling another async method by normal means. However, the VM also creates +// stubs that call async methods through calli where the async continuations +// are passed explicitly. See CEEJitInfo::getAsyncResumptionStub and +// MethodDesc::EmitTaskReturningThunk for examples. In +// those cases the JIT does not know (and does not need to know) which arg is +// the async continuation. +// +// The VM also uses the StubHelpers.AsyncCallContinuation() intrinsic in the +// stubs discussed above. The JIT must take care in those cases to still mark +// the preceding call as an async call; this is required for correct LSRA +// behavior and GC reporting around the returned async continuation. This is +// currently done in lowering; see LowerAsyncContinuation(). +// +bool GenTreeCall::IsAsync() const +{ + return gtIsAsyncCall; +} + //------------------------------------------------------------------------- // HasNonStandardAddedArgs: Return true if the method has non-standard args added to the call // argument list during argument morphing (fgMorphArgs), e.g., passed in R10 or R11 on AMD64. @@ -6556,6 +6594,7 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse) case GT_LCL_FLD: case GT_LCL_ADDR: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_LABEL: case GT_FTN_ADDR: case GT_RET_EXPR: @@ -6621,6 +6660,7 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse) case GT_RETURNTRAP: case GT_RETURN: case GT_RETFILT: + case GT_RETURN_SUSPEND: case GT_BSWAP: case GT_BSWAP16: case GT_KEEPALIVE: @@ -6916,10 +6956,10 @@ bool GenTree::OperRequiresCallFlag(Compiler* comp) const switch (gtOper) { case GT_CALL: - return true; - case GT_GCPOLL: case GT_KEEPALIVE: + case GT_ASYNC_CONTINUATION: + case GT_RETURN_SUSPEND: return true; case GT_SWIFT_ERROR: @@ -7249,6 +7289,8 @@ bool GenTree::OperRequiresGlobRefFlag(Compiler* comp) const case GT_CMPXCHG: case GT_MEMORYBARRIER: case GT_KEEPALIVE: + case GT_ASYNC_CONTINUATION: + case GT_RETURN_SUSPEND: case GT_SWIFT_ERROR: case GT_GCPOLL: return true; @@ -7309,6 +7351,8 @@ bool GenTree::OperSupportsOrderingSideEffect() const case GT_CMPXCHG: case GT_MEMORYBARRIER: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: + case GT_RETURN_SUSPEND: case GT_SWIFT_ERROR: return true; default: @@ -8280,6 +8324,7 @@ GenTreeCall* Compiler::gtNewCallNode(gtCallTypes callType, node->gtRetClsHnd = nullptr; node->gtControlExpr = nullptr; node->gtCallMoreFlags = GTF_CALL_M_EMPTY; + node->gtIsAsyncCall = false; INDEBUG(node->gtCallDebugFlags = GTF_CALL_MD_EMPTY); node->gtInlineInfoCount = 0; @@ -9422,6 +9467,7 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree) goto DONE; case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_NO_OP: case GT_NOP: case GT_LABEL: @@ -9892,8 +9938,9 @@ GenTreeCall* Compiler::gtCloneExprCallHelper(GenTreeCall* tree) copy->gtLateDevirtualizationInfo = tree->gtLateDevirtualizationInfo; - copy->gtCallType = tree->gtCallType; - copy->gtReturnType = tree->gtReturnType; + copy->gtIsAsyncCall = tree->gtIsAsyncCall; + copy->gtCallType = tree->gtCallType; + copy->gtReturnType = tree->gtReturnType; #if FEATURE_MULTIREG_RET copy->gtReturnTypeDesc = tree->gtReturnTypeDesc; @@ -10163,6 +10210,7 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) case GT_LCL_FLD: case GT_LCL_ADDR: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_LABEL: case GT_FTN_ADDR: case GT_RET_EXPR: @@ -10234,6 +10282,7 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) case GT_PUTARG_SPLIT: #endif // FEATURE_ARG_SPLIT case GT_RETURNTRAP: + case GT_RETURN_SUSPEND: m_edge = &m_node->AsUnOp()->gtOp1; assert(*m_edge != nullptr); m_advance = &GenTreeUseEdgeIterator::Terminate; @@ -11773,6 +11822,10 @@ void Compiler::gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, cons { ilName = "this"; } + else if (lclNum == lvaAsyncContinuationArg) + { + ilName = "AsyncCont"; + } else { ilKind = "arg"; @@ -12333,6 +12386,7 @@ void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack) case GT_START_PREEMPTGC: case GT_PROF_HOOK: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_MEMORYBARRIER: case GT_PINVOKE_PROLOG: case GT_JMPTABLE: @@ -13075,6 +13129,8 @@ const char* Compiler::gtGetWellKnownArgNameForArgMsg(WellKnownArg arg) return "va cookie"; case WellKnownArg::InstParam: return "gctx"; + case WellKnownArg::AsyncContinuation: + return "async"; case WellKnownArg::RetBuffer: return "retbuf"; case WellKnownArg::PInvokeFrame: diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index f3fb94b09429e8..ed4f5331e42eec 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4563,6 +4563,7 @@ enum class WellKnownArg : unsigned ThisPointer, VarArgsCookie, InstParam, + AsyncContinuation, RetBuffer, PInvokeFrame, WrapperDelegateCell, @@ -4741,6 +4742,7 @@ class CallArgs #endif bool m_hasThisPointer : 1; bool m_hasRetBuffer : 1; + bool m_hasAsyncContinuation : 1; bool m_isVarArgs : 1; bool m_abiInformationDetermined : 1; bool m_hasAddedFinalArgs : 1; @@ -4788,6 +4790,7 @@ class CallArgs CallArg* InsertAfter(Compiler* comp, CallArg* after, const NewCallArg& arg); CallArg* InsertAfterUnchecked(Compiler* comp, CallArg* after, const NewCallArg& arg); CallArg* InsertInstParam(Compiler* comp, GenTree* node); + CallArg* InsertAsyncContinuationParam(Compiler* comp, GenTree* node); CallArg* InsertAfterThisOrFirst(Compiler* comp, const NewCallArg& arg); void PushLateBack(CallArg* arg); void Remove(CallArg* arg); @@ -4815,6 +4818,7 @@ class CallArgs // clang-format off bool HasThisPointer() const { return m_hasThisPointer; } bool HasRetBuffer() const { return m_hasRetBuffer; } + bool HasAsyncContinuation() const { return m_hasAsyncContinuation; } bool IsVarArgs() const { return m_isVarArgs; } void SetIsVarArgs() { m_isVarArgs = true; } void ClearIsVarArgs() { m_isVarArgs = false; } @@ -5019,6 +5023,8 @@ struct GenTreeCall final : public GenTree #endif } + bool IsAsync() const; + //--------------------------------------------------------------------------- // GetRegNumByIdx: get i'th return register allocated to this call node. // @@ -5588,6 +5594,7 @@ struct GenTreeCall final : public GenTree var_types gtReturnType : 5; // exact return type uint8_t gtInlineInfoCount; // number of inline candidates for the given call + bool gtIsAsyncCall; CORINFO_CLASS_HANDLE gtRetClsHnd; // The return type handle of the call if it is a struct; always available union diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index 7a13212fa6eaf3..1be598168fb95a 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -33,6 +33,7 @@ GTNODE(LCL_ADDR , GenTreeLclFld ,0,0,GTK_LEAF) // local //----------------------------------------------------------------------------- GTNODE(CATCH_ARG , GenTree ,0,0,GTK_LEAF) // Exception object in a catch block +GTNODE(ASYNC_CONTINUATION, GenTree ,0,0,GTK_LEAF) // Access returned continuation by an async call GTNODE(LABEL , GenTree ,0,0,GTK_LEAF) // Jump-target GTNODE(JMP , GenTreeVal ,0,0,GTK_LEAF|GTK_NOVALUE) // Jump to another function GTNODE(FTN_ADDR , GenTreeFptrVal ,0,0,GTK_LEAF) // Address of a function @@ -290,6 +291,11 @@ GTNODE(RETURN , GenTreeOp ,0,1,GTK_UNOP|GTK_NOVALUE) GTNODE(SWITCH , GenTreeOp ,0,1,GTK_UNOP|GTK_NOVALUE) GTNODE(NO_OP , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE) // A NOP that cannot be deleted. +// Suspend an async method, returning a continuation. +// Before lowering this is a seemingly normal TYP_VOID node with a lot of side effects (GTF_CALL | GTF_GLOB_REF | GTF_ORDER_SIDEEFF). +// Lowering then removes all successor nodes and leaves it as the terminator node. +GTNODE(RETURN_SUSPEND , GenTreeOp ,0,1,GTK_UNOP|GTK_NOVALUE) // Return a continuation in an async method + GTNODE(START_NONGC , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Starts a new instruction group that will be non-gc interruptible. GTNODE(START_PREEMPTGC , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Starts a new instruction group where preemptive GC is enabled. GTNODE(PROF_HOOK , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Profiler Enter/Leave/TailCall hook. diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 0f609731675632..e1ea136c7152e4 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -846,7 +846,8 @@ GenTree* Compiler::impStoreStruct(GenTree* store, // Make sure we don't pass something other than a local address to the return buffer arg. // It is allowed to pass current's method return buffer as it is a local too. - if (fgAddrCouldBeHeap(destAddr) && !eeIsByrefLike(srcCall->gtRetClsHnd)) + if ((fgAddrCouldBeHeap(destAddr) && !eeIsByrefLike(srcCall->gtRetClsHnd)) || + (compIsAsync() && !destAddr->OperIs(GT_LCL_ADDR))) { unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer")); lvaSetStruct(tmp, srcCall->gtRetClsHnd, false); @@ -972,7 +973,8 @@ GenTree* Compiler::impStoreStruct(GenTree* store, // Make sure we don't pass something other than a local address to the return buffer arg. // It is allowed to pass current's method return buffer as it is a local too. - if (fgAddrCouldBeHeap(destAddr) && !eeIsByrefLike(call->gtRetClsHnd)) + if ((fgAddrCouldBeHeap(destAddr) && !eeIsByrefLike(call->gtRetClsHnd)) || + (compIsAsync() && !destAddr->OperIs(GT_LCL_ADDR))) { unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer")); lvaSetStruct(tmp, call->gtRetClsHnd, false); @@ -5970,6 +5972,85 @@ bool Compiler::impBlockIsInALoop(BasicBlock* block) block->HasFlag(BBF_BACKWARD_JUMP); } +//------------------------------------------------------------------------ +// impMatchAwaitPattern: check if a method call starts an Await pattern +// that can be optimized for runtime async +// +// Arguments: +// codeAddr - IL after call[virt] +// codeEndp - End of IL code stream +// configVal - [out] set to 0 or 1, accordingly, if we saw ConfigureAwait(0|1) +// +// Returns: +// true if this is an Await that we can optimize +// +bool Compiler::impMatchAwaitPattern(const BYTE* codeAddr, const BYTE* codeEndp, int* configVal) +{ + // If we see the following code pattern in runtime async methods: + // + // call[virt] + // [ OPTIONAL ] + // ldc.i4.0 / ldc.i4.1 + // call[virt] + // call + // + // We emit an eqivalent of: + // + // call[virt] + // + // where "RtMethod" is the runtime-async counterpart of a Task-returning method. + // + // NOTE: we could potentially check if Method is not a thunk and, in cases when we can tell, + // bypass this optimization. Otherwise in a non-thunk case we would be + // replacing the pattern with a call to a thunk, which contains roughly the same code. + + const BYTE* nextOpcode = codeAddr + sizeof(mdToken); + // There must be enough space after ldc for {call + tk + call + tk} + if (nextOpcode + 2 * (1 + sizeof(mdToken)) < codeEndp) + { + uint8_t nextOp = getU1LittleEndian(nextOpcode); + uint8_t nextNextOp = getU1LittleEndian(nextOpcode + 1); + if ((nextOp != CEE_LDC_I4_0 && nextOp != CEE_LDC_I4_1) || + (nextNextOp != CEE_CALL && nextNextOp != CEE_CALLVIRT)) + { + goto checkForAwait; + } + + // check if the token after {ldc, call[virt]} is ConfigAwait + CORINFO_RESOLVED_TOKEN nextCallTok; + impResolveToken(nextOpcode + 2, &nextCallTok, CORINFO_TOKENKIND_Method); + + if (!eeIsIntrinsic(nextCallTok.hMethod) || + lookupNamedIntrinsic(nextCallTok.hMethod) != NI_System_Threading_Tasks_Task_ConfigureAwait) + { + goto checkForAwait; + } + + *configVal = nextOp == CEE_LDC_I4_0 ? 0 : 1; + // skip {ldc; call; } + nextOpcode += 1 + 1 + sizeof(mdToken); + } + +checkForAwait: + + if ((nextOpcode + sizeof(mdToken) < codeEndp) && (getU1LittleEndian(nextOpcode) == CEE_CALL)) + { + // resolve the next token + CORINFO_RESOLVED_TOKEN nextCallTok; + impResolveToken(nextOpcode + 1, &nextCallTok, CORINFO_TOKENKIND_Method); + + // check if it is an Await intrinsic + if (eeIsIntrinsic(nextCallTok.hMethod) && + lookupNamedIntrinsic(nextCallTok.hMethod) == NI_System_Runtime_CompilerServices_RuntimeHelpers_Await) + { + // yes, this is an Await + return true; + } + } + + return false; +} + #ifdef _PREFAST_ #pragma warning(push) #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function @@ -8986,7 +9067,41 @@ void Compiler::impImportBlockCode(BasicBlock* block) // many other places. We unfortunately embed that knowledge here. if (opcode != CEE_CALLI) { - _impResolveToken(CORINFO_TOKENKIND_Method); + bool isAwait = false; + // TODO: The configVal should be wired to the actual implementation + // that control the flow of sync context. + // We do not have that yet. + int configVal = -1; // -1 not configured, 0/1 configured to false/true + if (compIsAsync() && JitConfig.JitOptimizeAwait()) + { + isAwait = impMatchAwaitPattern(codeAddr, codeEndp, &configVal); + } + + if (isAwait) + { + _impResolveToken(CORINFO_TOKENKIND_Await); + if (resolvedToken.hMethod != NULL) + { + // There is a runtime async variant that is implicitly awaitable, just call that. + // if configured, skip {ldc call ConfigureAwait} + if (configVal >= 0) + codeAddr += 2 + sizeof(mdToken); + + // Skip the call to `Await` + codeAddr += 1 + sizeof(mdToken); + } + else + { + // This can happen in rare cases when the Task-returning method is not a runtime Async + // function. For example "T M1(T arg) => arg" when called with a Task argument. Treat + // that as a regualr call that is Awaited + _impResolveToken(CORINFO_TOKENKIND_Method); + } + } + else + { + _impResolveToken(CORINFO_TOKENKIND_Method); + } eeGetCallInfo(&resolvedToken, (prefixFlags & PREFIX_CONSTRAINED) ? &constrainedResolvedToken : nullptr, @@ -13273,7 +13388,8 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) switch (arg.GetWellKnownArg()) { case WellKnownArg::RetBuffer: - // This does not appear in the table of inline arg info; do not include them + case WellKnownArg::AsyncContinuation: + // These do not appear in the table of inline arg info; do not include them continue; case WellKnownArg::InstParam: pInlineInfo->inlInstParamArgInfo = argInfo = new (this, CMK_Inlining) InlArgInfo{}; @@ -13850,7 +13966,7 @@ GenTree* Compiler::impInlineFetchArg(InlArgInfo& argInfo, const InlLclVarInfo& l // if it is a struct, because it requires some additional handling. if ((!varTypeIsStruct(lclTyp) && !argInfo.argHasSideEff && !argInfo.argHasGlobRef && - !argInfo.argHasCallerLocalRef)) + !argInfo.argHasCallerLocalRef && !argInfo.argIsByRefToStructLocal)) { /* Get a *LARGE* LCL_VAR node */ op1 = gtNewLclLNode(tmpNum, genActualType(lclTyp)); diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 208f6212cbd5c9..5aeff1b0258987 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -96,7 +96,8 @@ var_types Compiler::impImportCall(OPCODE opcode, bool bIntrinsicImported = false; CORINFO_SIG_INFO calliSig; - NewCallArg extraArg; + GenTree* varArgsCookie = nullptr; + GenTree* instParam = nullptr; // Swift calls that might throw use a SwiftError* arg that requires additional IR to handle, // so if we're importing a Swift call, look for this type in the signature @@ -711,12 +712,15 @@ var_types Compiler::impImportCall(OPCODE opcode, } } - /*------------------------------------------------------------------------- - * Create the argument list - */ + if (sig->isAsyncCall()) + { + call->AsCall()->gtIsAsyncCall = true; + } + + // Now create the argument list. //------------------------------------------------------------------------- - // Special case - for varargs we have an implicit last argument + // Special case - for varargs we have an extra argument if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG) { @@ -729,9 +733,7 @@ var_types Compiler::impImportCall(OPCODE opcode, varCookie = info.compCompHnd->getVarArgsHandle(sig, &pVarCookie); assert((!varCookie) != (!pVarCookie)); - GenTree* cookieNode = gtNewIconEmbHndNode(varCookie, pVarCookie, GTF_ICON_VARG_HDL, sig); - assert(extraArg.Node == nullptr); - extraArg = NewCallArg::Primitive(cookieNode).WellKnown(WellKnownArg::VarArgsCookie); + varArgsCookie = gtNewIconEmbHndNode(varCookie, pVarCookie, GTF_ICON_VARG_HDL, sig); } //------------------------------------------------------------------------- @@ -751,7 +753,7 @@ var_types Compiler::impImportCall(OPCODE opcode, // We also set the exact type context associated with the call so we can // inline the call correctly later on. - if (sig->callConv & CORINFO_CALLCONV_PARAMTYPE) + if (sig->hasTypeArg()) { assert(call->AsCall()->gtCallType == CT_USER_FUNC); if (clsHnd == nullptr) @@ -761,8 +763,7 @@ var_types Compiler::impImportCall(OPCODE opcode, assert(opcode != CEE_CALLI); - GenTree* instParam; - bool runtimeLookup; + bool runtimeLookup; // Instantiated generic method if (((SIZE_T)exactContextHnd & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_METHOD) @@ -852,9 +853,6 @@ var_types Compiler::impImportCall(OPCODE opcode, } } } - - assert(extraArg.Node == nullptr); - extraArg = NewCallArg::Primitive(instParam).WellKnown(WellKnownArg::InstParam); } if ((opcode == CEE_NEWOBJ) && ((clsFlags & CORINFO_FLG_DELEGATE) != 0)) @@ -890,18 +888,50 @@ var_types Compiler::impImportCall(OPCODE opcode, } impPopCallArgs(sig, call->AsCall()); - if (extraArg.Node != nullptr) + + // Extra args + if ((instParam != nullptr) || call->AsCall()->IsAsync() || (varArgsCookie != nullptr)) { if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L) { - call->AsCall()->gtArgs.PushFront(this, extraArg); + if (varArgsCookie != nullptr) + { + call->AsCall()->gtArgs.PushFront(this, NewCallArg::Primitive(varArgsCookie) + .WellKnown(WellKnownArg::VarArgsCookie)); + } + + if (call->AsCall()->IsAsync()) + { + call->AsCall()->gtArgs.PushFront(this, NewCallArg::Primitive(gtNewNull(), TYP_REF) + .WellKnown(WellKnownArg::AsyncContinuation)); + } + + if (instParam != nullptr) + { + call->AsCall()->gtArgs.PushFront(this, + NewCallArg::Primitive(instParam).WellKnown(WellKnownArg::InstParam)); + } } else { - call->AsCall()->gtArgs.PushBack(this, extraArg); - } + if (instParam != nullptr) + { + call->AsCall()->gtArgs.PushBack(this, + NewCallArg::Primitive(instParam).WellKnown(WellKnownArg::InstParam)); + } - call->gtFlags |= extraArg.Node->gtFlags & GTF_GLOB_EFFECT; + if (call->AsCall()->IsAsync()) + { + call->AsCall()->gtArgs.PushBack(this, NewCallArg::Primitive(gtNewNull(), TYP_REF) + .WellKnown(WellKnownArg::AsyncContinuation)); + } + + if (varArgsCookie != nullptr) + { + call->AsCall()->gtArgs.PushBack(this, NewCallArg::Primitive(varArgsCookie) + .WellKnown(WellKnownArg::VarArgsCookie)); + } + } } //------------------------------------------------------------------------- @@ -3303,6 +3333,32 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, return new (this, GT_LABEL) GenTree(GT_LABEL, TYP_I_IMPL); } + if (ni == NI_System_StubHelpers_AsyncCallContinuation) + { + GenTree* node = new (this, GT_ASYNC_CONTINUATION) GenTree(GT_ASYNC_CONTINUATION, TYP_REF); + node->SetHasOrderingSideEffect(); + node->gtFlags |= GTF_CALL | GTF_GLOB_REF; + info.compUsesAsyncContinuation = true; + return node; + } + + if (ni == NI_System_Runtime_CompilerServices_RuntimeHelpers_AsyncSuspend) + { + GenTree* node = gtNewOperNode(GT_RETURN_SUSPEND, TYP_VOID, impPopStack().val); + node->SetHasOrderingSideEffect(); + node->gtFlags |= GTF_CALL | GTF_GLOB_REF; + return node; + } + + if (ni == NI_System_Runtime_CompilerServices_RuntimeHelpers_Await) + { + // These are marked intrinsics simply to match them by name in + // the Await pattern optimization. Make sure we keep pIntrinsicName assigned + // (it would be overridden if we left this up to the rest of this function). + *pIntrinsicName = ni; + return nullptr; + } + bool betterToExpand = false; // Allow some lightweight intrinsics in Tier0 which can improve throughput @@ -10962,6 +11018,19 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_Runtime_CompilerServices_RuntimeHelpers_GetMethodTable; } + else if (strcmp(methodName, "Await") == 0) + { + result = NI_System_Runtime_CompilerServices_RuntimeHelpers_Await; + } + else if (strcmp(methodName, "AsyncSuspend") == 0) + { + result = NI_System_Runtime_CompilerServices_RuntimeHelpers_AsyncSuspend; + } + else if (strcmp(methodName, "get_RuntimeAsyncViaJitGeneratedStateMachines") == 0) + { + result = + NI_System_Runtime_CompilerServices_RuntimeHelpers_get_RuntimeAsyncViaJitGeneratedStateMachines; + } } else if (strcmp(className, "StaticsHelpers") == 0) { @@ -11211,6 +11280,10 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_StubHelpers_NextCallReturnAddress; } + else if (strcmp(methodName, "AsyncCallContinuation") == 0) + { + result = NI_System_StubHelpers_AsyncCallContinuation; + } } } else if (strcmp(namespaceName, "Text") == 0) @@ -11288,6 +11361,17 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) } } } + else if (strcmp(namespaceName, "Threading.Tasks") == 0) + { + if (strcmp(methodName, "ConfigureAwait") == 0) + { + if (strcmp(className, "Task`1") == 0 || strcmp(className, "Task") == 0 || + strcmp(className, "ValuTask`1") == 0 || strcmp(className, "ValueTask") == 0) + { + result = NI_System_Threading_Tasks_Task_ConfigureAwait; + } + } + } } } else if (strcmp(namespaceName, "Internal.Runtime") == 0) diff --git a/src/coreclr/jit/inline.def b/src/coreclr/jit/inline.def index 44d6e83929e0ba..526331254c56d1 100644 --- a/src/coreclr/jit/inline.def +++ b/src/coreclr/jit/inline.def @@ -115,6 +115,7 @@ INLINE_OBSERVATION(UNSUPPORTED_OPCODE, bool, "unsupported opcode", INLINE_OBSERVATION(DEBUG_CODEGEN, bool, "debug codegen", FATAL, CALLER) INLINE_OBSERVATION(IS_JIT_NOINLINE, bool, "noinline per JitNoInlineRange", FATAL, CALLER) INLINE_OBSERVATION(USES_NEXT_CALL_RET_ADDR, bool, "uses NextCallReturnAddress intrinsic", FATAL, CALLER) +INLINE_OBSERVATION(ASYNC_USED_CONTINUATION, bool, "uses AsyncCallContinuation intrinsic", FATAL, CALLER) // ------ Caller Information ------- diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 2c9231cb47eb24..38a791201bc105 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -594,6 +594,8 @@ OPT_CONFIG_INTEGER(JitDoIfConversion, "JitDoIfConversion", 1) OPT_CONFIG_INTEGER(JitDoOptimizeMaskConversions, "JitDoOptimizeMaskConversions", 1) // Perform optimization of mask // conversions +RELEASE_CONFIG_INTEGER(JitOptimizeAwait, "JitOptimizeAwait", 1) // Perform optimization of Await intrinsics + RELEASE_CONFIG_INTEGER(JitEnableOptRepeat, "JitEnableOptRepeat", 1) // If zero, do not allow JitOptRepeat RELEASE_CONFIG_METHODSET(JitOptRepeat, "JitOptRepeat") // Runs optimizer multiple times on specified methods RELEASE_CONFIG_INTEGER(JitOptRepeatCount, "JitOptRepeatCount", 2) // Number of times to repeat opts when repeating diff --git a/src/coreclr/jit/jitee.h b/src/coreclr/jit/jitee.h index d3a37a2c4d40c2..80d3640a3bbb4c 100644 --- a/src/coreclr/jit/jitee.h +++ b/src/coreclr/jit/jitee.h @@ -43,7 +43,7 @@ class JitFlags JIT_FLAG_RELATIVE_CODE_RELOCS = 29, // JIT should generate PC-relative address computations instead of EE relocation records JIT_FLAG_SOFTFP_ABI = 30, // Enable armel calling convention #endif - + JIT_FLAG_ASYNC = 31, // Generate code for use as an async function // Note: the mcs tool uses the currently unused upper flags bits when outputting SuperPMI MC file flags. // See EXTRA_JIT_FLAGS and spmidumphelper.cpp. Currently, these are bits 56 through 63. If they overlap, // something needs to change. diff --git a/src/coreclr/jit/layout.cpp b/src/coreclr/jit/layout.cpp index 1160300ad6698c..a1ad460435caa7 100644 --- a/src/coreclr/jit/layout.cpp +++ b/src/coreclr/jit/layout.cpp @@ -567,6 +567,31 @@ ClassLayout* ClassLayout::Create(Compiler* compiler, const ClassLayoutBuilder& b return newLayout; } +//------------------------------------------------------------------------ +// HasGCByRef: // Check if this classlayout has a TYP_BYREF GC pointer in it. +// +// Return value: +// True if so. +// +bool ClassLayout::HasGCByRef() const +{ + if (!HasGCPtr()) + { + return false; + } + + unsigned numSlots = GetSlotCount(); + for (unsigned i = 0; i < numSlots; i++) + { + if (GetGCPtrType(i) == TYP_BYREF) + { + return true; + } + } + + return false; +} + //------------------------------------------------------------------------ // IsStackOnly: does the layout represent a block that can never be on the heap? // diff --git a/src/coreclr/jit/layout.h b/src/coreclr/jit/layout.h index 1f5918840057a4..ca367d5fb56307 100644 --- a/src/coreclr/jit/layout.h +++ b/src/coreclr/jit/layout.h @@ -224,6 +224,8 @@ class ClassLayout return m_gcPtrCount != 0; } + bool HasGCByRef() const; + bool IsStackOnly(Compiler* comp) const; bool IsGCPtr(unsigned slot) const diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 29d5e07e7d3d47..bbd3fce509f652 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -36,19 +36,19 @@ void Compiler::lvaInitTypeRef() { /* x86 args look something like this: - [this ptr] [hidden return buffer] [declared arguments]* [generic context] [var arg cookie] + [this ptr] [hidden return buffer] [declared arguments]* [generic context] [async continuation] [var arg cookie] x64 is closer to the native ABI: - [this ptr] [hidden return buffer] [generic context] [var arg cookie] [declared arguments]* + [this ptr] [hidden return buffer] [generic context] [async continuation] [var arg cookie] [declared arguments]* (Note: prior to .NET Framework 4.5.1 for Windows 8.1 (but not .NET Framework 4.5.1 "downlevel"), the "hidden return buffer" came before the "this ptr". Now, the "this ptr" comes first. This is different from the C++ order, where the "hidden return buffer" always comes first.) ARM and ARM64 are the same as the current x64 convention: - [this ptr] [hidden return buffer] [generic context] [var arg cookie] [declared arguments]* + [this ptr] [hidden return buffer] [generic context] [async continuation] [var arg cookie] [declared arguments]* Key difference: - The var arg cookie and generic context are swapped with respect to the user arguments + The var arg cookie, generic context and async continuations are swapped with respect to the user arguments */ /* Set compArgsCount and compLocalsCount */ @@ -161,6 +161,11 @@ void Compiler::lvaInitTypeRef() info.compTypeCtxtArg = BAD_VAR_NUM; } + if (compIsAsync()) + { + info.compArgsCount++; + } + lvaCount = info.compLocalsCount = info.compArgsCount + info.compMethodInfo->locals.numArgs; info.compILlocalsCount = info.compILargsCount + info.compMethodInfo->locals.numArgs; @@ -371,6 +376,8 @@ void Compiler::lvaInitArgs(bool hasRetBuffArg) // and shared generic struct instance methods lvaInitGenericsCtxt(&varNum); + lvaInitAsyncContinuation(&varNum); + /* If the method is varargs, process the varargs cookie */ lvaInitVarArgsHandle(&varNum); #endif @@ -384,6 +391,8 @@ void Compiler::lvaInitArgs(bool hasRetBuffArg) // and shared generic struct instance methods lvaInitGenericsCtxt(&varNum); + lvaInitAsyncContinuation(&varNum); + /* If the method is varargs, process the varargs cookie */ lvaInitVarArgsHandle(&varNum); #endif @@ -676,6 +685,33 @@ void Compiler::lvaInitGenericsCtxt(unsigned* curVarNum) (*curVarNum)++; } +//----------------------------------------------------------------------------- +// lvaInitAsyncContinuation: +// Initialize the async continuation parameter. +// +// Type parameters: +// curVarNum - [in, out] The current local variable number for parameters +// +void Compiler::lvaInitAsyncContinuation(unsigned* curVarNum) +{ + if (!compIsAsync()) + { + return; + } + + lvaAsyncContinuationArg = *curVarNum; + LclVarDsc* varDsc = lvaGetDesc(*curVarNum); + varDsc->lvType = TYP_REF; + varDsc->lvIsParam = true; + + // The final home for this incoming register might be our local stack frame + varDsc->lvOnFrame = true; + + INDEBUG(varDsc->lvReason = "Async continuation arg"); + + (*curVarNum)++; +} + /*****************************************************************************/ void Compiler::lvaInitVarArgsHandle(unsigned* curVarNum) { diff --git a/src/coreclr/jit/lir.h b/src/coreclr/jit/lir.h index a3271e832fa8de..99d011ea32e9bc 100644 --- a/src/coreclr/jit/lir.h +++ b/src/coreclr/jit/lir.h @@ -284,6 +284,20 @@ class LIR final void InsertAtBeginning(Range&& range); void InsertAtEnd(Range&& range); + template + void InsertAtBeginning(GenTree* tree, Trees&&... rest) + { + InsertAtBeginning(std::forward(rest)...); + InsertAtBeginning(tree); + } + + template + void InsertAtEnd(GenTree* tree, Trees&&... rest) + { + InsertAtEnd(tree); + InsertAtEnd(std::forward(rest)...); + } + void Remove(GenTree* node, bool markOperandsUnused = false); Range Remove(GenTree* firstNode, GenTree* lastNode); Range Remove(ReadOnlyRange&& range); diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index b199d9b6612509..f1c7c9a1d1ef9e 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -1454,6 +1454,7 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR case GT_JCC: case GT_JTRUE: case GT_RETURN: + case GT_RETURN_SUSPEND: case GT_SWITCH: case GT_RETFILT: case GT_START_NONGC: diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 9784dd6d55bd5d..314f1580d39609 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -732,6 +732,13 @@ GenTree* Lowering::LowerNode(GenTree* node) return LowerArrLength(node->AsArrCommon()); break; + case GT_ASYNC_CONTINUATION: + return LowerAsyncContinuation(node); + + case GT_RETURN_SUSPEND: + LowerReturnSuspend(node); + break; + default: break; } @@ -5410,6 +5417,72 @@ void Lowering::LowerRetSingleRegStructLclVar(GenTreeUnOp* ret) } } +//---------------------------------------------------------------------------------------------- +// LowerAsyncContinuation: Lower a GT_ASYNC_CONTINUATION node +// +// Arguments: +// asyncCont - Async continuation node +// +// Returns: +// Next node to lower. +// +GenTree* Lowering::LowerAsyncContinuation(GenTree* asyncCont) +{ + assert(asyncCont->OperIs(GT_ASYNC_CONTINUATION)); + + GenTree* next = asyncCont->gtNext; + + // When the ASYNC_CONTINUATION was created as a result of the + // AsyncCallContinuation() intrinsic the previous call hasn't been marked + // as an async call. We need to do that to get the right GC reporting + // behavior for the returned async continuation. Furthermore, we ensure the + // async continuation follows the call to simplify marking the registers + // busy in LSRA. + GenTree* node = asyncCont; + while (true) + { + node = node->gtPrev; + noway_assert((node != nullptr) && "Ran out of nodes while looking for call before async continuation"); + + if (node->IsCall()) + { + if (!node->AsCall()->IsAsync()) + { + JITDUMP("Marking the call [%06u] before async continuation [%06u] as an async call\n", + Compiler::dspTreeID(node), Compiler::dspTreeID(asyncCont)); + node->AsCall()->gtIsAsyncCall = true; + } + + BlockRange().Remove(asyncCont); + BlockRange().InsertAfter(node, asyncCont); + break; + } + } + + return next; +} + +//---------------------------------------------------------------------------------------------- +// LowerReturnSuspend: +// Lower a GT_RETURN_SUSPEND by making it a terminator node. +// +// Arguments: +// node - The node +// +void Lowering::LowerReturnSuspend(GenTree* node) +{ + assert(node->OperIs(GT_RETURN_SUSPEND)); + while (BlockRange().LastNode() != node) + { + BlockRange().Remove(BlockRange().LastNode(), true); + } + + if (comp->compMethodRequiresPInvokeFrame()) + { + InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(node)); + } +} + //---------------------------------------------------------------------------------------------- // LowerCallStruct: Lowers a call node that returns a struct. // diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 388e136f3f7812..b0adf251eec29b 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -180,6 +180,8 @@ class Lowering final : public Phase GenTree* LowerStoreLocCommon(GenTreeLclVarCommon* lclVar); void LowerRetStruct(GenTreeUnOp* ret); void LowerRetSingleRegStructLclVar(GenTreeUnOp* ret); + GenTree* LowerAsyncContinuation(GenTree* asyncCont); + void LowerReturnSuspend(GenTree* retSuspend); void LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList); bool IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList); void LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 2dcdc5db3f79fa..2a547b11b25649 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1030,7 +1030,7 @@ class LinearScan : public LinearScanInterface // insert refpositions representing prolog zero-inits which will be added later void insertZeroInitRefPositions(); - void addKillForRegs(regMaskTP mask, LsraLocation currentLoc); + RefPosition* addKillForRegs(regMaskTP mask, LsraLocation currentLoc); void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition); @@ -2014,6 +2014,7 @@ class LinearScan : public LinearScanInterface int BuildPutArgReg(GenTreeUnOp* node); int BuildCall(GenTreeCall* call); void MarkSwiftErrorBusyForCall(GenTreeCall* call); + void MarkAsyncContinuationBusyForCall(GenTreeCall* call); int BuildCmp(GenTree* tree); int BuildCmpOperands(GenTree* tree); int BuildBlockStore(GenTreeBlk* blkNode); diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index 815f0149aede11..8b7e2a9e086dcd 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -631,6 +631,12 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + assert(dstCount == 1); + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + case GT_COPY: srcCount = 1; #ifdef TARGET_ARM @@ -693,6 +699,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_JCC: case GT_SETCC: case GT_MEMORYBARRIER: + case GT_RETURN_SUSPEND: srcCount = BuildSimple(tree); break; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index e79748f8555673..7baaef17ac484d 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1320,6 +1320,11 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + case GT_INDEX_ADDR: assert(dstCount == 1); srcCount = BuildBinaryUses(tree->AsOp()); diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index b3c6c7d4cf788d..13fc2bd1ede079 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -274,6 +274,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index b6b01ca38e63f8..28c2bea643bff2 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -686,7 +686,7 @@ bool LinearScan::isContainableMemoryOp(GenTree* node) // mask - the mask (set) of registers. // currentLoc - the location at which they should be added // -void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) +RefPosition* LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) { // The mask identifies a set of registers that will be used during // codegen. Mark these as modified here, so when we do final frame @@ -705,6 +705,8 @@ void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) *killTail = pos; killTail = &pos->nextRefPosition; + + return pos; } //------------------------------------------------------------------------ @@ -4741,3 +4743,24 @@ void LinearScan::MarkSwiftErrorBusyForCall(GenTreeCall* call) setDelayFree(swiftErrorRegRecord->lastRefPosition); } #endif + +//------------------------------------------------------------------------ +// MarkAsyncContinuationBusyForCall: +// Add a ref position that marks the async continuation register as busy +// until it is killed. +// +// Arguments: +// call - The call node +// +void LinearScan::MarkAsyncContinuationBusyForCall(GenTreeCall* call) +{ + // We model the async continuation like the swift error register: we ensure + // the node follows the call in lowering, and make it delay freed to ensure + // nothing is allocated into the register between the call and + // ASYNC_CONTINUATION node. We need to add a kill here in the right spot as + // not all targets may naturally have one created. + assert(call->gtNext != nullptr); + assert(call->gtNext->OperIs(GT_ASYNC_CONTINUATION)); + RefPosition* refPos = addKillForRegs(RBM_ASYNC_CONTINUATION_RET, currentLoc + 1); + setDelayFree(refPos); +} diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 529e6d8127b670..b1f699c95ce734 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -563,6 +563,11 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + case GT_INDEX_ADDR: assert(dstCount == 1); srcCount = BuildBinaryUses(tree->AsOp()); @@ -782,6 +787,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 5428e6b5c8860c..764c89f4b00f26 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -756,6 +756,11 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + case GT_INDEX_ADDR: assert(dstCount == 1); srcCount = BuildBinaryUses(tree->AsOp()); @@ -987,6 +992,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 6f92d25d2b23ed..2a7e39be76e9ee 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -627,6 +627,11 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + #if defined(FEATURE_EH_WINDOWS_X86) case GT_END_LFIN: srcCount = 0; @@ -1358,6 +1363,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 79b9d2743c9375..b0483d39891f8f 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -189,6 +189,7 @@ GenTree* Compiler::fgMorphIntoHelperCall(GenTree* tree, int helper, bool morphAr call->gtCallMoreFlags = GTF_CALL_M_EMPTY; INDEBUG(call->gtCallDebugFlags = GTF_CALL_MD_EMPTY); call->gtControlExpr = nullptr; + call->gtIsAsyncCall = false; call->ClearInlineInfo(); #ifdef UNIX_X86_ABI call->gtFlags |= GTF_CALL_POP_ARGS; @@ -709,6 +710,8 @@ const char* getWellKnownArgName(WellKnownArg arg) return "VarArgsCookie"; case WellKnownArg::InstParam: return "InstParam"; + case WellKnownArg::AsyncContinuation: + return "AsyncContinuation"; case WellKnownArg::RetBuffer: return "RetBuffer"; case WellKnownArg::PInvokeFrame: @@ -4490,6 +4493,12 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) } #endif + if (compIsAsync() != call->IsAsync()) + { + failTailCall("Caller and callee do not agree on async-ness"); + return nullptr; + } + // We have to ensure to pass the incoming retValBuf as the // outgoing one. Using a temp will not do as this function will // not regain control to do the copy. This can happen when inlining diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index ddee451a200bea..a09a54b0a016cb 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -103,6 +103,7 @@ enum NamedIntrinsic : unsigned short NI_System_RuntimeType_get_TypeHandle, NI_System_StubHelpers_GetStubContext, NI_System_StubHelpers_NextCallReturnAddress, + NI_System_StubHelpers_AsyncCallContinuation, NI_Array_Address, NI_Array_Get, @@ -118,6 +119,9 @@ enum NamedIntrinsic : unsigned short NI_System_Runtime_CompilerServices_RuntimeHelpers_IsKnownConstant, NI_System_Runtime_CompilerServices_RuntimeHelpers_IsReferenceOrContainsReferences, NI_System_Runtime_CompilerServices_RuntimeHelpers_GetMethodTable, + NI_System_Runtime_CompilerServices_RuntimeHelpers_Await, + NI_System_Runtime_CompilerServices_RuntimeHelpers_AsyncSuspend, + NI_System_Runtime_CompilerServices_RuntimeHelpers_get_RuntimeAsyncViaJitGeneratedStateMachines, NI_System_Runtime_CompilerServices_StaticsHelpers_VolatileReadAsByref, @@ -150,6 +154,8 @@ enum NamedIntrinsic : unsigned short NI_System_Threading_Interlocked_ExchangeAdd, NI_System_Threading_Interlocked_MemoryBarrier, + NI_System_Threading_Tasks_Task_ConfigureAwait, + // These two are special marker IDs so that we still get the inlining profitability boost NI_System_Numerics_Intrinsic, NI_System_Runtime_Intrinsics_Intrinsic, diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index 9afb109802e85f..711e1d91e9a109 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -999,6 +999,11 @@ void Compiler::optValnumCSE_InitDataFlow() } } + if (compIsAsync()) + { + optValnumCSE_SetUpAsyncByrefKills(); + } + for (BasicBlock* const block : Blocks()) { // If the block doesn't contains a call then skip it... @@ -1082,6 +1087,112 @@ void Compiler::optValnumCSE_InitDataFlow() #endif // DEBUG } +//--------------------------------------------------------------------------- +// optValnumCSE_SetUpAsyncByrefKills: +// Compute kills because of async calls requiring byrefs not to be live +// across them. +// +void Compiler::optValnumCSE_SetUpAsyncByrefKills() +{ + bool anyAsyncKills = false; + cseAsyncKillsMask = BitVecOps::MakeFull(cseLivenessTraits); + for (unsigned inx = 1; inx <= optCSECandidateCount; inx++) + { + CSEdsc* dsc = optCSEtab[inx - 1]; + assert(dsc->csdIndex == inx); + bool isByRef = false; + if (dsc->csdTree->TypeIs(TYP_BYREF)) + { + isByRef = true; + } + else if (dsc->csdTree->TypeIs(TYP_STRUCT)) + { + ClassLayout* layout = dsc->csdTree->GetLayout(this); + isByRef = layout->HasGCByRef(); + } + + if (isByRef) + { + // We generate a bit pattern like: 1111111100111100 where there + // are 0s only for the byref CSEs. + BitVecOps::RemoveElemD(cseLivenessTraits, cseAsyncKillsMask, getCSEAvailBit(inx)); + BitVecOps::RemoveElemD(cseLivenessTraits, cseAsyncKillsMask, getCSEAvailCrossCallBit(inx)); + anyAsyncKills = true; + } + } + + if (!anyAsyncKills) + { + return; + } + + for (BasicBlock* block : Blocks()) + { + Statement* asyncCallStmt = nullptr; + GenTree* asyncCall = nullptr; + // Find last async call in block + Statement* stmt = block->lastStmt(); + if (stmt == nullptr) + { + continue; + } + + while (asyncCall == nullptr) + { + if ((stmt->GetRootNode()->gtFlags & GTF_CALL) != 0) + { + for (GenTree* tree = stmt->GetRootNode(); tree != nullptr; tree = tree->gtPrev) + { + if (tree->IsCall() && tree->AsCall()->IsAsync()) + { + asyncCallStmt = stmt; + asyncCall = tree; + break; + } + } + } + + if (stmt == block->firstStmt()) + break; + + stmt = stmt->GetPrevStmt(); + } + + if (asyncCall == nullptr) + { + continue; + } + + // This block has a suspension point. Make all BYREF CSEs unavailable. + BitVecOps::IntersectionD(cseLivenessTraits, block->bbCseGen, cseAsyncKillsMask); + BitVecOps::IntersectionD(cseLivenessTraits, block->bbCseOut, cseAsyncKillsMask); + + // Now make all byref CSEs after the suspension point available. + Statement* curStmt = asyncCallStmt; + GenTree* curTree = asyncCall; + while (true) + { + do + { + if (IS_CSE_INDEX(curTree->gtCSEnum)) + { + unsigned CSEnum = GET_CSE_INDEX(curTree->gtCSEnum); + BitVecOps::AddElemD(cseLivenessTraits, block->bbCseGen, getCSEAvailBit(CSEnum)); + BitVecOps::AddElemD(cseLivenessTraits, block->bbCseOut, getCSEAvailBit(CSEnum)); + } + + curTree = curTree->gtNext; + } while (curTree != nullptr); + + curStmt = curStmt->GetNextStmt(); + if (curStmt == nullptr) + break; + + curTree = curStmt->GetTreeList(); + } + } +} + /***************************************************************************** * * CSE Dataflow, so that all helper methods for dataflow are in a single place @@ -1577,7 +1688,7 @@ void Compiler::optValnumCSE_Availability() // kill all of the cseAvailCrossCallBit for each CSE whenever we see a GT_CALL (unless the call // generates a CSE). // - if (tree->OperGet() == GT_CALL) + if (tree->OperIs(GT_CALL)) { // Check for the common case of an already empty available_cses set // and thus nothing needs to be killed @@ -1595,6 +1706,12 @@ void Compiler::optValnumCSE_Availability() // BitVecOps::IntersectionD(cseLivenessTraits, available_cses, cseCallKillsMask); + // In async state machines, make all byref CSEs unavailable after suspension points. + if (tree->AsCall()->IsAsync() && compIsAsync()) + { + BitVecOps::IntersectionD(cseLivenessTraits, available_cses, cseAsyncKillsMask); + } + if (isDef) { // We can have a GT_CALL that produces a CSE, diff --git a/src/coreclr/jit/patchpoint.cpp b/src/coreclr/jit/patchpoint.cpp index ab695c0a5c3bb1..96ff63be2b2ff4 100644 --- a/src/coreclr/jit/patchpoint.cpp +++ b/src/coreclr/jit/patchpoint.cpp @@ -235,11 +235,10 @@ class PatchpointTransformer // Add helper call // - // call PartialCompilationPatchpointHelper(ilOffset) + // call PatchpointForced(ilOffset) // GenTree* ilOffsetNode = compiler->gtNewIconNode(ilOffset, TYP_INT); - GenTreeCall* helperCall = - compiler->gtNewHelperCallNode(CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, TYP_VOID, ilOffsetNode); + GenTreeCall* helperCall = compiler->gtNewHelperCallNode(CORINFO_HELP_PATCHPOINT_FORCED, TYP_VOID, ilOffsetNode); compiler->fgNewStmtAtEnd(block, helperCall); } diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index eaaf6fc4ccbc9d..e9c15f2b14805a 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -540,6 +540,9 @@ #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_RCX #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_RAX + #define REG_ASYNC_CONTINUATION_RET REG_RCX + #define RBM_ASYNC_CONTINUATION_RET RBM_RCX + // What sort of reloc do we use for [disp32] address mode #define IMAGE_REL_BASED_DISP32 IMAGE_REL_BASED_REL32 diff --git a/src/coreclr/jit/targetarm.h b/src/coreclr/jit/targetarm.h index 95cb19a2291a49..cd3d29fafef33d 100644 --- a/src/coreclr/jit/targetarm.h +++ b/src/coreclr/jit/targetarm.h @@ -247,6 +247,9 @@ #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R0 + #define REG_ASYNC_CONTINUATION_RET REG_R2 + #define RBM_ASYNC_CONTINUATION_RET RBM_R2 + #define REG_FPBASE REG_R11 #define RBM_FPBASE RBM_R11 #define STR_FPBASE "r11" diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index 3e1dec49b4778a..678a05e181e40d 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -263,6 +263,9 @@ #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R15 #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_R9 + #define REG_ASYNC_CONTINUATION_RET REG_R2 + #define RBM_ASYNC_CONTINUATION_RET RBM_R2 + #define REG_FPBASE REG_FP #define RBM_FPBASE RBM_FP #define STR_FPBASE "fp" diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h index 452778c31963a0..d691f4c8fd1ec2 100644 --- a/src/coreclr/jit/targetloongarch64.h +++ b/src/coreclr/jit/targetloongarch64.h @@ -246,6 +246,9 @@ #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_T3 #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_T0 + #define REG_ASYNC_CONTINUATION_RET REG_A2 + #define RBM_ASYNC_CONTINUATION_RET RBM_A2 + #define REG_FPBASE REG_FP #define RBM_FPBASE RBM_FP #define STR_FPBASE "fp" diff --git a/src/coreclr/jit/targetriscv64.h b/src/coreclr/jit/targetriscv64.h index e5dcded3d878f5..ee6c6d22260c7c 100644 --- a/src/coreclr/jit/targetriscv64.h +++ b/src/coreclr/jit/targetriscv64.h @@ -222,6 +222,9 @@ #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_T3 #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_T0 + #define REG_ASYNC_CONTINUATION_RET REG_A2 + #define RBM_ASYNC_CONTINUATION_RET RBM_A2 + #define REG_FPBASE REG_FP #define RBM_FPBASE RBM_FP #define STR_FPBASE "fp" diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h index dd63766d631ac7..e630a1ae842120 100644 --- a/src/coreclr/jit/targetx86.h +++ b/src/coreclr/jit/targetx86.h @@ -294,6 +294,9 @@ #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~RBM_ECX) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_ECX + #define REG_ASYNC_CONTINUATION_RET REG_ECX + #define RBM_ASYNC_CONTINUATION_RET RBM_ECX + #define REG_FPBASE REG_EBP #define RBM_FPBASE RBM_EBP #define STR_FPBASE "ebp" diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index b474ace3df6b7e..53e89ff12aff11 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -10436,8 +10436,8 @@ static genTreeOps genTreeOpsIllegalAsVNFunc[] = {GT_IND, // When we do heap memo GT_NOP, // These control-flow operations need no values. - GT_JTRUE, GT_RETURN, GT_SWITCH, GT_RETFILT, GT_CKFINITE, - GT_SWIFT_ERROR_RET}; + GT_JTRUE, GT_RETURN, GT_RETURN_SUSPEND, GT_SWITCH, GT_RETFILT, + GT_CKFINITE, GT_SWIFT_ERROR_RET}; void ValueNumStore::ValidateValueNumStoreStatics() { @@ -12322,9 +12322,9 @@ void Compiler::fgValueNumberTree(GenTree* tree) break; case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_SWIFT_ERROR: - // We know nothing about the value of a caught expression. - // We also know nothing about the error register's value post-Swift call. + // We know nothing about the value of these. tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); break; @@ -12694,6 +12694,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) case GT_SWITCH: case GT_RETURN: case GT_RETFILT: + case GT_RETURN_SUSPEND: case GT_NULLCHECK: if (tree->gtGetOp1() != nullptr) { From 075c19ddf45e658a5b257239c7b6039461d9fcb1 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 21 Apr 2025 13:10:42 +0200 Subject: [PATCH 02/21] Stub out JIT-EE changes --- src/coreclr/inc/corinfo.h | 38 ++++++++++++++++++++++++++++++++++ src/coreclr/jit/async.cpp | 18 ++++++++++++++-- src/coreclr/jit/compiler.h | 3 ++- src/coreclr/jit/importer.cpp | 2 ++ src/coreclr/jit/jitee.h | 1 - src/coreclr/jit/patchpoint.cpp | 2 ++ 6 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 08970ec4102e9b..527075b01634ff 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -991,6 +991,8 @@ struct CORINFO_SIG_INFO unsigned totalILArgs() { return (numArgs + (hasImplicitThis() ? 1 : 0)); } bool isVarArg() { return ((getCallConv() == CORINFO_CALLCONV_VARARG) || (getCallConv() == CORINFO_CALLCONV_NATIVEVARARG)); } bool hasTypeArg() { return ((callConv & CORINFO_CALLCONV_PARAMTYPE) != 0); } + // TODO-Async: Unify with VM + bool isAsyncCall() { return false; } }; struct CORINFO_METHOD_INFO @@ -1688,6 +1690,42 @@ struct CORINFO_EE_INFO CORINFO_OS osType; }; +enum CorInfoContinuationFlags +{ + // Whether or not the continuation expects the result to be boxed and + // placed in the GCData array at index 0. Not set if the callee is void. + CORINFO_CONTINUATION_RESULT_IN_GCDATA = 1, + // If this bit is set the continuation resumes inside a try block and thus + // if an exception is being propagated, needs to be resumed. The exception + // should be placed at index 0 or 1 depending on whether the continuation + // also expects a result. + CORINFO_CONTINUATION_NEEDS_EXCEPTION = 2, + // If this bit is set the continuation has an OSR IL offset saved in the + // beginning of 'Data'. + CORINFO_CONTINUATION_OSR_IL_OFFSET_IN_DATA = 4, +}; + +struct CORINFO_ASYNC_INFO +{ + // Class handle for System.Runtime.CompilerServices.Continuation + CORINFO_CLASS_HANDLE continuationClsHnd; + // 'Next' field + CORINFO_FIELD_HANDLE continuationNextFldHnd; + // 'Resume' field + CORINFO_FIELD_HANDLE continuationResumeFldHnd; + // 'State' field + CORINFO_FIELD_HANDLE continuationStateFldHnd; + // 'Flags' field + CORINFO_FIELD_HANDLE continuationFlagsFldHnd; + // 'Data' field + CORINFO_FIELD_HANDLE continuationDataFldHnd; + // 'GCData' field + CORINFO_FIELD_HANDLE continuationGCDataFldHnd; + // Whether or not the continuation needs to be allocated through the + // helper that also takes a method handle + bool continuationsNeedMethodHandle; +}; + // Flags passed from JIT to runtime. enum CORINFO_GET_TAILCALL_HELPERS_FLAGS { diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 114386b675be3b..e1ec8fbabb93c0 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -273,7 +273,8 @@ PhaseStatus AsyncTransformation::Run() // Ask the VM to create a resumption stub for this specific version of the // code. It is stored in the continuation as a function pointer, so we need // the fixed entry point here. - m_resumeStub = m_comp->info.compCompHnd->getAsyncResumptionStub(); + // TODO: Get once VM changes are merged + // m_resumeStub = m_comp->info.compCompHnd->getAsyncResumptionStub(); m_comp->info.compCompHnd->getFunctionFixedEntryPoint(m_resumeStub, false, &m_resumeStubLookup); m_returnedContinuationVar = m_comp->lvaGrabTemp(false DEBUGARG("returned continuation")); @@ -281,7 +282,8 @@ PhaseStatus AsyncTransformation::Run() m_newContinuationVar = m_comp->lvaGrabTemp(false DEBUGARG("new continuation")); m_comp->lvaGetDesc(m_newContinuationVar)->lvType = TYP_REF; - m_comp->info.compCompHnd->getAsyncInfo(&m_asyncInfo); + // TODO-Async: Unify with VM + //m_comp->info.compCompHnd->getAsyncInfo(&m_asyncInfo); #ifdef JIT32_GCENCODER // Due to a hard cap on epilogs we need a shared return here. @@ -922,16 +924,22 @@ GenTreeCall* AsyncTransformation::CreateAllocContinuationCall(AsyncLiveness& lif if (methodHandleArg != nullptr) { + // TODO-Async: Unify with VM + const CorInfoHelpFunc CORINFO_HELP_ALLOC_CONTINUATION_METHOD = CORINFO_HELP_UNDEF; return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION_METHOD, TYP_REF, prevContinuation, gcRefsCountNode, dataSizeNode, methodHandleArg); } if (classHandleArg != nullptr) { + // TODO-Async: Unify with VM + const CorInfoHelpFunc CORINFO_HELP_ALLOC_CONTINUATION_CLASS = CORINFO_HELP_UNDEF; return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION_CLASS, TYP_REF, prevContinuation, gcRefsCountNode, dataSizeNode, classHandleArg); } + // TODO-Async: Unify with VM + const CorInfoHelpFunc CORINFO_HELP_ALLOC_CONTINUATION = CORINFO_HELP_UNDEF; return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION, TYP_REF, prevContinuation, gcRefsCountNode, dataSizeNode); } @@ -1430,6 +1438,9 @@ BasicBlock* AsyncTransformation::RethrowExceptionOnResumption(BasicBlock* LIR::AsRange(resumeBB).InsertAtEnd(exception, null, neNull, jtrue); exception = m_comp->gtNewLclVarNode(exceptionLclNum, TYP_REF); + + // TODO-Async: Unify with VM + const CorInfoHelpFunc CORINFO_HELP_THROWEXACT = CORINFO_HELP_UNDEF; GenTreeCall* rethrowException = m_comp->gtNewHelperCallNode(CORINFO_HELP_THROWEXACT, TYP_VOID, exception); m_comp->compCurBB = rethrowExceptionBB; @@ -1906,6 +1917,9 @@ void AsyncTransformation::CreateResumptionSwitch() LIR::AsRange(checkILOffsetBB).InsertAtEnd(ilOffset, zero, geZero, jtrue); ilOffset = m_comp->gtNewLclvNode(ilOffsetLclNum, TYP_INT); + + // TODO-Async: Unify with VM + const CorInfoHelpFunc CORINFO_HELP_PATCHPOINT_FORCED = CORINFO_HELP_UNDEF; GenTreeCall* callHelper = m_comp->gtNewHelperCallNode(CORINFO_HELP_PATCHPOINT_FORCED, TYP_VOID, ilOffset); callHelper->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 2419e099502306..430b8600c5c2df 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -10891,7 +10891,8 @@ class Compiler bool compIsAsync() const { - return opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ASYNC); + // TODO-Async: Unify with VM and introduce JIT_FLAG_ASYNC/CORJIT_FLAG_ASYNC + return false; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index e1ea136c7152e4..67c207e57753cb 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -9079,6 +9079,8 @@ void Compiler::impImportBlockCode(BasicBlock* block) if (isAwait) { + // TODO-Async: Unify with VM + const CorInfoTokenKind CORINFO_TOKENKIND_Await = (CorInfoTokenKind)0; _impResolveToken(CORINFO_TOKENKIND_Await); if (resolvedToken.hMethod != NULL) { diff --git a/src/coreclr/jit/jitee.h b/src/coreclr/jit/jitee.h index 80d3640a3bbb4c..5a4de5a5635fe8 100644 --- a/src/coreclr/jit/jitee.h +++ b/src/coreclr/jit/jitee.h @@ -43,7 +43,6 @@ class JitFlags JIT_FLAG_RELATIVE_CODE_RELOCS = 29, // JIT should generate PC-relative address computations instead of EE relocation records JIT_FLAG_SOFTFP_ABI = 30, // Enable armel calling convention #endif - JIT_FLAG_ASYNC = 31, // Generate code for use as an async function // Note: the mcs tool uses the currently unused upper flags bits when outputting SuperPMI MC file flags. // See EXTRA_JIT_FLAGS and spmidumphelper.cpp. Currently, these are bits 56 through 63. If they overlap, // something needs to change. diff --git a/src/coreclr/jit/patchpoint.cpp b/src/coreclr/jit/patchpoint.cpp index 96ff63be2b2ff4..b846c94d07b72a 100644 --- a/src/coreclr/jit/patchpoint.cpp +++ b/src/coreclr/jit/patchpoint.cpp @@ -238,6 +238,8 @@ class PatchpointTransformer // call PatchpointForced(ilOffset) // GenTree* ilOffsetNode = compiler->gtNewIconNode(ilOffset, TYP_INT); + // TODO: Unify with VM + const CorInfoHelpFunc CORINFO_HELP_PATCHPOINT_FORCED = CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT; GenTreeCall* helperCall = compiler->gtNewHelperCallNode(CORINFO_HELP_PATCHPOINT_FORCED, TYP_VOID, ilOffsetNode); compiler->fgNewStmtAtEnd(block, helperCall); From 4cec8cb576d49c744cba54482447b896bc0b05f5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 21 Apr 2025 13:10:53 +0200 Subject: [PATCH 03/21] Replace async marker with flag --- src/coreclr/jit/fginline.cpp | 2 +- src/coreclr/jit/gentree.cpp | 8 +++----- src/coreclr/jit/gentree.h | 7 ++++++- src/coreclr/jit/importercalls.cpp | 2 +- src/coreclr/jit/lower.cpp | 2 +- src/coreclr/jit/morph.cpp | 1 - 6 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index f9e8c4686018d1..4816a52b9beb2e 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -1072,7 +1072,7 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result, return; } - if (call->gtIsAsyncCall && info.compUsesAsyncContinuation) + if (call->IsAsync() && info.compUsesAsyncContinuation) { // Currently not supported. Could provide a nice perf benefit for // Task -> runtime async thunks if we supported it. diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 9e2c1f21ef5e1a..3ed544123ccaa8 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -2282,7 +2282,7 @@ bool GenTreeCall::HasSideEffects(Compiler* compiler, bool ignoreExceptions, bool // bool GenTreeCall::IsAsync() const { - return gtIsAsyncCall; + return (gtCallMoreFlags & GTF_CALL_M_ASYNC) != 0; } //------------------------------------------------------------------------- @@ -8324,7 +8324,6 @@ GenTreeCall* Compiler::gtNewCallNode(gtCallTypes callType, node->gtRetClsHnd = nullptr; node->gtControlExpr = nullptr; node->gtCallMoreFlags = GTF_CALL_M_EMPTY; - node->gtIsAsyncCall = false; INDEBUG(node->gtCallDebugFlags = GTF_CALL_MD_EMPTY); node->gtInlineInfoCount = 0; @@ -9938,9 +9937,8 @@ GenTreeCall* Compiler::gtCloneExprCallHelper(GenTreeCall* tree) copy->gtLateDevirtualizationInfo = tree->gtLateDevirtualizationInfo; - copy->gtIsAsyncCall = tree->gtIsAsyncCall; - copy->gtCallType = tree->gtCallType; - copy->gtReturnType = tree->gtReturnType; + copy->gtCallType = tree->gtCallType; + copy->gtReturnType = tree->gtReturnType; #if FEATURE_MULTIREG_RET copy->gtReturnTypeDesc = tree->gtReturnTypeDesc; diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index ed4f5331e42eec..8edeb8bf6e189a 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4238,6 +4238,7 @@ enum GenTreeCallFlags : unsigned int GTF_CALL_M_GUARDED_DEVIRT_CHAIN = 0x00080000, // this call is a candidate for chained guarded devirtualization GTF_CALL_M_ALLOC_SIDE_EFFECTS = 0x00100000, // this is a call to an allocator with side effects GTF_CALL_M_SUPPRESS_GC_TRANSITION = 0x00200000, // suppress the GC transition (i.e. during a pinvoke) but a separate GC safe point is required. + GTF_CALL_M_ASYNC = 0x00400000, // this call is a runtime async method call and thus a suspension point GTF_CALL_M_EXPANDED_EARLY = 0x00800000, // the Virtual Call target address is expanded and placed in gtControlExpr in Morph rather than in Lower GTF_CALL_M_LDVIRTFTN_INTERFACE = 0x01000000, // ldvirtftn on an interface type GTF_CALL_M_CAST_CAN_BE_EXPANDED = 0x02000000, // this cast (helper call) can be expanded if it's profitable. To be removed. @@ -5023,6 +5024,11 @@ struct GenTreeCall final : public GenTree #endif } + void SetIsAsync() + { + gtCallMoreFlags |= GTF_CALL_M_ASYNC; + } + bool IsAsync() const; //--------------------------------------------------------------------------- @@ -5594,7 +5600,6 @@ struct GenTreeCall final : public GenTree var_types gtReturnType : 5; // exact return type uint8_t gtInlineInfoCount; // number of inline candidates for the given call - bool gtIsAsyncCall; CORINFO_CLASS_HANDLE gtRetClsHnd; // The return type handle of the call if it is a struct; always available union diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 5aeff1b0258987..6ac3be8e42e1f9 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -714,7 +714,7 @@ var_types Compiler::impImportCall(OPCODE opcode, if (sig->isAsyncCall()) { - call->AsCall()->gtIsAsyncCall = true; + call->AsCall()->SetIsAsync(); } // Now create the argument list. diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 314f1580d39609..8810705f30bbe3 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -5450,7 +5450,7 @@ GenTree* Lowering::LowerAsyncContinuation(GenTree* asyncCont) { JITDUMP("Marking the call [%06u] before async continuation [%06u] as an async call\n", Compiler::dspTreeID(node), Compiler::dspTreeID(asyncCont)); - node->AsCall()->gtIsAsyncCall = true; + node->AsCall()->SetIsAsync(); } BlockRange().Remove(asyncCont); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index b0483d39891f8f..474c8f2b9e9961 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -189,7 +189,6 @@ GenTree* Compiler::fgMorphIntoHelperCall(GenTree* tree, int helper, bool morphAr call->gtCallMoreFlags = GTF_CALL_M_EMPTY; INDEBUG(call->gtCallDebugFlags = GTF_CALL_MD_EMPTY); call->gtControlExpr = nullptr; - call->gtIsAsyncCall = false; call->ClearInlineInfo(); #ifdef UNIX_X86_ABI call->gtFlags |= GTF_CALL_POP_ARGS; From b98ce25dc9197e47795b36776a150967ca7a317b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 21 Apr 2025 13:14:35 +0200 Subject: [PATCH 04/21] Fix a todo --- src/coreclr/jit/patchpoint.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/patchpoint.cpp b/src/coreclr/jit/patchpoint.cpp index b846c94d07b72a..3f11cbf53b3563 100644 --- a/src/coreclr/jit/patchpoint.cpp +++ b/src/coreclr/jit/patchpoint.cpp @@ -238,7 +238,7 @@ class PatchpointTransformer // call PatchpointForced(ilOffset) // GenTree* ilOffsetNode = compiler->gtNewIconNode(ilOffset, TYP_INT); - // TODO: Unify with VM + // TODO-Async: Unify with VM const CorInfoHelpFunc CORINFO_HELP_PATCHPOINT_FORCED = CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT; GenTreeCall* helperCall = compiler->gtNewHelperCallNode(CORINFO_HELP_PATCHPOINT_FORCED, TYP_VOID, ilOffsetNode); From 717fbd104ec153a25dd2e906011a693b79c51352 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 21 Apr 2025 13:15:33 +0200 Subject: [PATCH 05/21] Undo unnecessary change --- src/coreclr/jit/patchpoint.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/patchpoint.cpp b/src/coreclr/jit/patchpoint.cpp index 3f11cbf53b3563..ab695c0a5c3bb1 100644 --- a/src/coreclr/jit/patchpoint.cpp +++ b/src/coreclr/jit/patchpoint.cpp @@ -235,12 +235,11 @@ class PatchpointTransformer // Add helper call // - // call PatchpointForced(ilOffset) + // call PartialCompilationPatchpointHelper(ilOffset) // GenTree* ilOffsetNode = compiler->gtNewIconNode(ilOffset, TYP_INT); - // TODO-Async: Unify with VM - const CorInfoHelpFunc CORINFO_HELP_PATCHPOINT_FORCED = CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT; - GenTreeCall* helperCall = compiler->gtNewHelperCallNode(CORINFO_HELP_PATCHPOINT_FORCED, TYP_VOID, ilOffsetNode); + GenTreeCall* helperCall = + compiler->gtNewHelperCallNode(CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, TYP_VOID, ilOffsetNode); compiler->fgNewStmtAtEnd(block, helperCall); } From 65590e54c1b4755a98b761d17ae4452bee5d891d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 21 Apr 2025 13:16:38 +0200 Subject: [PATCH 06/21] Undo some JIT-EE changes --- src/coreclr/jit/ICorJitInfo_names_generated.h | 2 -- .../jit/ICorJitInfo_wrapper_generated.hpp | 16 ---------------- 2 files changed, 18 deletions(-) diff --git a/src/coreclr/jit/ICorJitInfo_names_generated.h b/src/coreclr/jit/ICorJitInfo_names_generated.h index ca1fed8c1c6592..94e244c0749bfa 100644 --- a/src/coreclr/jit/ICorJitInfo_names_generated.h +++ b/src/coreclr/jit/ICorJitInfo_names_generated.h @@ -124,7 +124,6 @@ DEF_CLR_API(getHFAType) DEF_CLR_API(runWithErrorTrap) DEF_CLR_API(runWithSPMIErrorTrap) DEF_CLR_API(getEEInfo) -DEF_CLR_API(getAsyncInfo) DEF_CLR_API(getMethodDefFromMethod) DEF_CLR_API(printMethodName) DEF_CLR_API(getMethodNameFromMetadata) @@ -162,7 +161,6 @@ DEF_CLR_API(getFieldThreadLocalStoreID) DEF_CLR_API(GetDelegateCtor) DEF_CLR_API(MethodCompileComplete) DEF_CLR_API(getTailCallHelpers) -DEF_CLR_API(getAsyncResumptionStub) DEF_CLR_API(convertPInvokeCalliToCall) DEF_CLR_API(notifyInstructionSetUsage) DEF_CLR_API(updateEntryPointForTailCall) diff --git a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp index 131698d8d495f2..9c7e6c1099826d 100644 --- a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp +++ b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp @@ -1181,14 +1181,6 @@ void WrapICorJitInfo::getEEInfo( API_LEAVE(getEEInfo); } -void WrapICorJitInfo::getAsyncInfo( - CORINFO_ASYNC_INFO* pAsyncInfoOut) -{ - API_ENTER(getAsyncInfo); - wrapHnd->getAsyncInfo(pAsyncInfoOut); - API_LEAVE(getAsyncInfo); -} - mdMethodDef WrapICorJitInfo::getMethodDefFromMethod( CORINFO_METHOD_HANDLE hMethod) { @@ -1563,14 +1555,6 @@ bool WrapICorJitInfo::getTailCallHelpers( return temp; } -CORINFO_METHOD_HANDLE WrapICorJitInfo::getAsyncResumptionStub() -{ - API_ENTER(getAsyncResumptionStub); - CORINFO_METHOD_HANDLE temp = wrapHnd->getAsyncResumptionStub(); - API_LEAVE(getAsyncResumptionStub); - return temp; -} - bool WrapICorJitInfo::convertPInvokeCalliToCall( CORINFO_RESOLVED_TOKEN* pResolvedToken, bool mustConvert) From 5a67ad05812f9bf9e5399cb12fcf5744a9b0c16d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 21 Apr 2025 13:25:07 +0200 Subject: [PATCH 07/21] Add high level description of the transformation --- src/coreclr/jit/async.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index e1ec8fbabb93c0..2073c393ed45a0 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -228,6 +228,19 @@ void AsyncLiveness::GetLiveLocals(jitstd::vector& liveLocals, uns // Returns: // Suitable phase status. // +// Remarks: +// This transformation creates the state machine structure of the async +// function. After each async call a check for whether that async call +// suspended is inserted. If the check passes a continuation is allocated +// into which the live state is stored. The continuation is returned back to +// the caller to indicate that now this function also suspended. +// +// Associated with each suspension point is also resumption IR. The +// resumption IR restores all live state from the continuation object. IR is +// inserted at the beginning of the function to dispatch on the continuation +// (if one is present), which each suspension point having an associated +// state number that can be switched over. +// PhaseStatus Compiler::TransformAsync() { assert(compIsAsync()); @@ -729,9 +742,9 @@ ContinuationLayout AsyncTransformation::LayOutContinuation(BasicBlock* // STORE_LCL_VAR/STORE_LCL_FLD that follows the call node. // // Parameters: -// block - The block containing the async call -// call - The async call -// life - Liveness information about live locals +// block - The block containing the async call +// call - The async call +// life - Liveness information about live locals // // Returns: // Information about the definition after canonicalization. From 237bb4b7bc55bf983bc0360707b03507d8808c56 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 21 Apr 2025 13:30:45 +0200 Subject: [PATCH 08/21] Sort async file correctly --- src/coreclr/jit/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index c0939484ff5701..f8167e8ae94241 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -88,8 +88,8 @@ endif(CLR_CMAKE_TARGET_WIN32) set( JIT_SOURCES abi.cpp alloc.cpp - async.cpp assertionprop.cpp + async.cpp bitset.cpp block.cpp buildstring.cpp @@ -286,8 +286,8 @@ set( JIT_HEADERS _typeinfo.h abi.h alloc.h - async.h arraystack.h + async.h bitset.h layout.h bitsetasshortlong.h From fcba2795ea1aeffca8c9f53136168b17e48eedf8 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 21 Apr 2025 13:59:04 +0200 Subject: [PATCH 09/21] Run jit-format --- src/coreclr/jit/async.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 2073c393ed45a0..6336afe495c674 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -296,7 +296,7 @@ PhaseStatus AsyncTransformation::Run() m_comp->lvaGetDesc(m_newContinuationVar)->lvType = TYP_REF; // TODO-Async: Unify with VM - //m_comp->info.compCompHnd->getAsyncInfo(&m_asyncInfo); + // m_comp->info.compCompHnd->getAsyncInfo(&m_asyncInfo); #ifdef JIT32_GCENCODER // Due to a hard cap on epilogs we need a shared return here. @@ -1450,11 +1450,11 @@ BasicBlock* AsyncTransformation::RethrowExceptionOnResumption(BasicBlock* GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, neNull); LIR::AsRange(resumeBB).InsertAtEnd(exception, null, neNull, jtrue); - exception = m_comp->gtNewLclVarNode(exceptionLclNum, TYP_REF); + exception = m_comp->gtNewLclVarNode(exceptionLclNum, TYP_REF); // TODO-Async: Unify with VM const CorInfoHelpFunc CORINFO_HELP_THROWEXACT = CORINFO_HELP_UNDEF; - GenTreeCall* rethrowException = m_comp->gtNewHelperCallNode(CORINFO_HELP_THROWEXACT, TYP_VOID, exception); + GenTreeCall* rethrowException = m_comp->gtNewHelperCallNode(CORINFO_HELP_THROWEXACT, TYP_VOID, exception); m_comp->compCurBB = rethrowExceptionBB; m_comp->fgMorphTree(rethrowException); @@ -1929,7 +1929,7 @@ void AsyncTransformation::CreateResumptionSwitch() GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, geZero); LIR::AsRange(checkILOffsetBB).InsertAtEnd(ilOffset, zero, geZero, jtrue); - ilOffset = m_comp->gtNewLclvNode(ilOffsetLclNum, TYP_INT); + ilOffset = m_comp->gtNewLclvNode(ilOffsetLclNum, TYP_INT); // TODO-Async: Unify with VM const CorInfoHelpFunc CORINFO_HELP_PATCHPOINT_FORCED = CORINFO_HELP_UNDEF; From ae82cba1adf6777ab11426585c9b261937d6b27b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 21 Apr 2025 16:34:32 +0200 Subject: [PATCH 10/21] Undo a change --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fginline.cpp | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 430b8600c5c2df..fdf37afa2a6aa6 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6860,7 +6860,7 @@ class Compiler void fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* result, InlineContext** createdContext); void fgInsertInlineeBlocks(InlineInfo* pInlineInfo); - void fgInsertInlineeArgument(InlineInfo* pInlineInfo, const InlArgInfo& argInfo, BasicBlock* block, Statement** afterStmt, Statement** newStmt, const DebugInfo& callDI); + void fgInsertInlineeArgument(const InlArgInfo& argInfo, BasicBlock* block, Statement** afterStmt, Statement** newStmt, const DebugInfo& callDI); Statement* fgInlinePrependStatements(InlineInfo* inlineInfo); void fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, Statement* stmt); diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 4816a52b9beb2e..fdaff26c9b93d1 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -2002,12 +2002,8 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) // newStmt - updated with the new statement // callDI - debug info for the call // -void Compiler::fgInsertInlineeArgument(InlineInfo* inlineInfo, - const InlArgInfo& argInfo, - BasicBlock* block, - Statement** afterStmt, - Statement** newStmt, - const DebugInfo& callDI) +void Compiler::fgInsertInlineeArgument( + const InlArgInfo& argInfo, BasicBlock* block, Statement** afterStmt, Statement** newStmt, const DebugInfo& callDI) { const bool argIsSingleDef = !argInfo.argHasLdargaOp && !argInfo.argHasStargOp; CallArg* arg = argInfo.arg; @@ -2253,7 +2249,7 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) } assert(argInfo != nullptr); - fgInsertInlineeArgument(inlineInfo, *argInfo, block, &afterStmt, &newStmt, callDI); + fgInsertInlineeArgument(*argInfo, block, &afterStmt, &newStmt, callDI); } // Add the CCTOR check if asked for. From 0fe94e60c8ab2af077783d66550fbe7815c26902 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 23 Apr 2025 20:00:49 +0200 Subject: [PATCH 11/21] Remove PSPSym reference --- src/coreclr/jit/async.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 6336afe495c674..22624959ec040d 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -92,12 +92,6 @@ bool AsyncLiveness::IsLocalCaptureUnnecessary(unsigned lclNum) return true; } - if (lclNum == m_comp->lvaPSPSym) - { - // Initialized in prolog - return true; - } - if (lclNum == m_comp->info.compLvFrameListRoot) { return true; From f383273f4de0829cd8e3e69cd981f55128a80db2 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 23 Apr 2025 20:02:34 +0200 Subject: [PATCH 12/21] Delete old intrinsic --- src/coreclr/jit/importercalls.cpp | 5 ----- src/coreclr/jit/namedintrinsiclist.h | 1 - 2 files changed, 6 deletions(-) diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index ea60b15f839c78..a10c7d3771bc84 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -11076,11 +11076,6 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_Runtime_CompilerServices_RuntimeHelpers_AsyncSuspend; } - else if (strcmp(methodName, "get_RuntimeAsyncViaJitGeneratedStateMachines") == 0) - { - result = - NI_System_Runtime_CompilerServices_RuntimeHelpers_get_RuntimeAsyncViaJitGeneratedStateMachines; - } } else if (strcmp(className, "StaticsHelpers") == 0) { diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index cc1c7762ae8477..8804cf515d2aa7 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -123,7 +123,6 @@ enum NamedIntrinsic : unsigned short NI_System_Runtime_CompilerServices_RuntimeHelpers_GetMethodTable, NI_System_Runtime_CompilerServices_RuntimeHelpers_Await, NI_System_Runtime_CompilerServices_RuntimeHelpers_AsyncSuspend, - NI_System_Runtime_CompilerServices_RuntimeHelpers_get_RuntimeAsyncViaJitGeneratedStateMachines, NI_System_Runtime_CompilerServices_StaticsHelpers_VolatileReadAsByref, From 96350be611cf40de5661571a5cf17b71aa541207 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 23 Apr 2025 21:09:16 +0200 Subject: [PATCH 13/21] Remove unnecessary CallArgs change --- src/coreclr/jit/gentree.cpp | 8 -------- src/coreclr/jit/gentree.h | 2 -- 2 files changed, 10 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 45708303690fea..8754c2b24df809 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -1241,7 +1241,6 @@ CallArgs::CallArgs() #endif , m_hasThisPointer(false) , m_hasRetBuffer(false) - , m_hasAsyncContinuation(false) , m_isVarArgs(false) , m_abiInformationDetermined(false) , m_hasAddedFinalArgs(false) @@ -1520,9 +1519,6 @@ void CallArgs::AddedWellKnownArg(WellKnownArg arg) case WellKnownArg::RetBuffer: m_hasRetBuffer = true; break; - case WellKnownArg::AsyncContinuation: - m_hasAsyncContinuation = true; - break; default: break; } @@ -1546,10 +1542,6 @@ void CallArgs::RemovedWellKnownArg(WellKnownArg arg) assert(FindWellKnownArg(arg) == nullptr); m_hasRetBuffer = false; break; - case WellKnownArg::AsyncContinuation: - assert(FindWellKnownArg(arg) == nullptr); - m_hasAsyncContinuation = false; - break; default: break; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 8edeb8bf6e189a..632751eef64d48 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4743,7 +4743,6 @@ class CallArgs #endif bool m_hasThisPointer : 1; bool m_hasRetBuffer : 1; - bool m_hasAsyncContinuation : 1; bool m_isVarArgs : 1; bool m_abiInformationDetermined : 1; bool m_hasAddedFinalArgs : 1; @@ -4819,7 +4818,6 @@ class CallArgs // clang-format off bool HasThisPointer() const { return m_hasThisPointer; } bool HasRetBuffer() const { return m_hasRetBuffer; } - bool HasAsyncContinuation() const { return m_hasAsyncContinuation; } bool IsVarArgs() const { return m_isVarArgs; } void SetIsVarArgs() { m_isVarArgs = true; } void ClearIsVarArgs() { m_isVarArgs = false; } From a383044b2521527511d424c903f834ab56e4405e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 29 Apr 2025 22:24:16 +0200 Subject: [PATCH 14/21] Clean up after merge --- src/coreclr/inc/corjitflags.h | 2 +- src/coreclr/jit/async.cpp | 13 +------------ src/coreclr/jit/compiler.h | 3 +-- src/coreclr/jit/importer.cpp | 2 -- src/coreclr/jit/jitee.h | 3 +++ 5 files changed, 6 insertions(+), 17 deletions(-) diff --git a/src/coreclr/inc/corjitflags.h b/src/coreclr/inc/corjitflags.h index 86d9b44a797c9f..d7a8349a81d61b 100644 --- a/src/coreclr/inc/corjitflags.h +++ b/src/coreclr/inc/corjitflags.h @@ -63,7 +63,7 @@ class CORJIT_FLAGS CORJIT_FLAG_RELATIVE_CODE_RELOCS = 29, // JIT should generate PC-relative address computations instead of EE relocation records CORJIT_FLAG_SOFTFP_ABI = 30, // Enable armel calling convention #endif - CORJIT_FLAG_ASYNC = 31, // Generate Code for use as an async function + CORJIT_FLAG_ASYNC = 31, // Generate code for use as an async function }; CORJIT_FLAGS() diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 22624959ec040d..3301e8b517b780 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -289,8 +289,7 @@ PhaseStatus AsyncTransformation::Run() m_newContinuationVar = m_comp->lvaGrabTemp(false DEBUGARG("new continuation")); m_comp->lvaGetDesc(m_newContinuationVar)->lvType = TYP_REF; - // TODO-Async: Unify with VM - // m_comp->info.compCompHnd->getAsyncInfo(&m_asyncInfo); + m_comp->info.compCompHnd->getAsyncInfo(&m_asyncInfo); #ifdef JIT32_GCENCODER // Due to a hard cap on epilogs we need a shared return here. @@ -931,22 +930,16 @@ GenTreeCall* AsyncTransformation::CreateAllocContinuationCall(AsyncLiveness& lif if (methodHandleArg != nullptr) { - // TODO-Async: Unify with VM - const CorInfoHelpFunc CORINFO_HELP_ALLOC_CONTINUATION_METHOD = CORINFO_HELP_UNDEF; return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION_METHOD, TYP_REF, prevContinuation, gcRefsCountNode, dataSizeNode, methodHandleArg); } if (classHandleArg != nullptr) { - // TODO-Async: Unify with VM - const CorInfoHelpFunc CORINFO_HELP_ALLOC_CONTINUATION_CLASS = CORINFO_HELP_UNDEF; return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION_CLASS, TYP_REF, prevContinuation, gcRefsCountNode, dataSizeNode, classHandleArg); } - // TODO-Async: Unify with VM - const CorInfoHelpFunc CORINFO_HELP_ALLOC_CONTINUATION = CORINFO_HELP_UNDEF; return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION, TYP_REF, prevContinuation, gcRefsCountNode, dataSizeNode); } @@ -1446,8 +1439,6 @@ BasicBlock* AsyncTransformation::RethrowExceptionOnResumption(BasicBlock* exception = m_comp->gtNewLclVarNode(exceptionLclNum, TYP_REF); - // TODO-Async: Unify with VM - const CorInfoHelpFunc CORINFO_HELP_THROWEXACT = CORINFO_HELP_UNDEF; GenTreeCall* rethrowException = m_comp->gtNewHelperCallNode(CORINFO_HELP_THROWEXACT, TYP_VOID, exception); m_comp->compCurBB = rethrowExceptionBB; @@ -1925,8 +1916,6 @@ void AsyncTransformation::CreateResumptionSwitch() ilOffset = m_comp->gtNewLclvNode(ilOffsetLclNum, TYP_INT); - // TODO-Async: Unify with VM - const CorInfoHelpFunc CORINFO_HELP_PATCHPOINT_FORCED = CORINFO_HELP_UNDEF; GenTreeCall* callHelper = m_comp->gtNewHelperCallNode(CORINFO_HELP_PATCHPOINT_FORCED, TYP_VOID, ilOffset); callHelper->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index d660d8a5d905ae..a1fad2ce2b59d7 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -10874,8 +10874,7 @@ class Compiler bool compIsAsync() const { - // TODO-Async: Unify with VM and introduce JIT_FLAG_ASYNC/CORJIT_FLAG_ASYNC - return false; + return opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ASYNC); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 67c207e57753cb..e1ea136c7152e4 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -9079,8 +9079,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) if (isAwait) { - // TODO-Async: Unify with VM - const CorInfoTokenKind CORINFO_TOKENKIND_Await = (CorInfoTokenKind)0; _impResolveToken(CORINFO_TOKENKIND_Await); if (resolvedToken.hMethod != NULL) { diff --git a/src/coreclr/jit/jitee.h b/src/coreclr/jit/jitee.h index 5a4de5a5635fe8..c8f0e64f1f4a97 100644 --- a/src/coreclr/jit/jitee.h +++ b/src/coreclr/jit/jitee.h @@ -43,6 +43,8 @@ class JitFlags JIT_FLAG_RELATIVE_CODE_RELOCS = 29, // JIT should generate PC-relative address computations instead of EE relocation records JIT_FLAG_SOFTFP_ABI = 30, // Enable armel calling convention #endif + + JIT_FLAG_ASYNC = 31, // Generate code for use as an async function // Note: the mcs tool uses the currently unused upper flags bits when outputting SuperPMI MC file flags. // See EXTRA_JIT_FLAGS and spmidumphelper.cpp. Currently, these are bits 56 through 63. If they overlap, // something needs to change. @@ -140,6 +142,7 @@ class JitFlags FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_RELATIVE_CODE_RELOCS, JIT_FLAG_RELATIVE_CODE_RELOCS); FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_SOFTFP_ABI, JIT_FLAG_SOFTFP_ABI); #endif // TARGET_ARM + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_ASYNC, JIT_FLAG_ASYNC); #undef FLAGS_EQUAL } From ce2c4f16c72d5504af8edfc4dcb6e22d6d4daa09 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 29 Apr 2025 22:27:34 +0200 Subject: [PATCH 15/21] Clarify IL offset stored --- src/coreclr/jit/async.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 3301e8b517b780..67851d3c25652a 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -638,11 +638,11 @@ ContinuationLayout AsyncTransformation::LayOutContinuation(BasicBlock* return lhs.Alignment > rhs.Alignment; }); - // For OSR, we store the transition IL offset at the beginning of the data - // (-1 in the tier0 version): + // For OSR, we store the IL offset that inspired the OSR method at the + // beginning of the data (-1 in the tier0 version): if (m_comp->doesMethodHavePatchpoints() || m_comp->opts.IsOSR()) { - JITDUMP(" Method %s; keeping an IL offset at the beginning of non-GC data\n", + JITDUMP(" Method %s; keeping IL offset that inspired OSR method at the beginning of non-GC data\n", m_comp->doesMethodHavePatchpoints() ? "has patchpoints" : "is an OSR method"); layout.DataSize += sizeof(int); } From a94387ec8ea7d6db0411318c5e4871676ee05cfb Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 29 Apr 2025 22:33:32 +0200 Subject: [PATCH 16/21] Address feedback --- src/coreclr/jit/async.cpp | 2 ++ src/coreclr/jit/block.h | 2 +- src/coreclr/jit/lower.cpp | 23 +++++++++++++++++------ 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 67851d3c25652a..0dff2300fbce8c 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -1201,6 +1201,8 @@ BasicBlock* AsyncTransformation::CreateResumption(BasicBlock* bloc JITDUMP(" Creating resumption " FMT_BB " for state %u\n", resumeBB->bbNum, stateNum); + // We need to restore data before we restore GC pointers: restoring the + // data can also write the GC pointer fields (with nulls). unsigned resumeByteArrLclNum = BAD_VAR_NUM; if (layout.DataSize > 0) { diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 83754de7d7bef0..c2e922ae47cce0 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -481,7 +481,7 @@ enum BasicBlockFlags : uint64_t // For example, the top block might or might not have BBF_GC_SAFE_POINT, // but we assume it does not have BBF_GC_SAFE_POINT any more. - BBF_SPLIT_LOST = BBF_GC_SAFE_POINT | BBF_NEEDS_GCPOLL | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_RECURSIVE_TAILCALL | BBF_ASYNC_RESUMPTION, + BBF_SPLIT_LOST = BBF_GC_SAFE_POINT | BBF_NEEDS_GCPOLL | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_RECURSIVE_TAILCALL, // Flags gained by the bottom block when a block is split. // Note, this is a conservative guess. diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 9514784db175e4..61de442555b8fc 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -5454,12 +5454,23 @@ GenTree* Lowering::LowerAsyncContinuation(GenTree* asyncCont) GenTree* next = asyncCont->gtNext; - // When the ASYNC_CONTINUATION was created as a result of the - // AsyncCallContinuation() intrinsic the previous call hasn't been marked - // as an async call. We need to do that to get the right GC reporting - // behavior for the returned async continuation. Furthermore, we ensure the - // async continuation follows the call to simplify marking the registers - // busy in LSRA. + // + // ASYNC_CONTINUATION is created from two sources: + // + // 1. The async resumption stubs are IL stubs created by the VM. These call + // runtime async functions via "calli", passing the continuation manually. + // They use the AsyncHelpers.AsyncCallContinuation intrinsic after the + // calli, which turns into the ASYNC_CONTINUATION node during import. + // + // 2. In the async transformation, ASYNC_CONTINUATION nodes are inserted + // after calls to async calls. + // + // In the former case nothing has marked the previous call as an "async" + // method. We need to do that here to ensure that the backend knows that + // the call has a non-standard calling convention that returns an + // additional GC ref. This requires additional GC tracking that we would + // otherwise not get. + // GenTree* node = asyncCont; while (true) { From f5479b0f671926e9a730e2c7a1ac4334dca1ce57 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 29 Apr 2025 22:39:16 +0200 Subject: [PATCH 17/21] More clean up --- .../System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs | 5 +++-- src/coreclr/inc/corinfo.h | 5 +++-- src/coreclr/jit/async.cpp | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs index b2043e975345f1..98e2c4493f1e03 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs @@ -57,8 +57,9 @@ internal enum CorInfoContinuationFlags // should be placed at index 0 or 1 depending on whether the continuation // also expects a result. CORINFO_CONTINUATION_NEEDS_EXCEPTION = 2, - // If this bit is set the continuation has an OSR IL offset saved in the - // beginning of 'Data'. + // If this bit is set the continuation has the IL offset that inspired the + // OSR method saved in the beginning of 'Data', or -1 if the continuation + // belongs to a tier 0 method. CORINFO_CONTINUATION_OSR_IL_OFFSET_IN_DATA = 4, } diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 02d108c853ebb1..a63cf8a5a0cf68 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1708,8 +1708,9 @@ enum CorInfoContinuationFlags // should be placed at index 0 or 1 depending on whether the continuation // also expects a result. CORINFO_CONTINUATION_NEEDS_EXCEPTION = 2, - // If this bit is set the continuation has an OSR IL offset saved in the - // beginning of 'Data'. + // If this bit is set the continuation has the IL offset that inspired the + // OSR method saved in the beginning of 'Data', or -1 if the continuation + // belongs to a tier 0 method. CORINFO_CONTINUATION_OSR_IL_OFFSET_IN_DATA = 4, }; diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 0dff2300fbce8c..01877e918000c6 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -1441,7 +1441,7 @@ BasicBlock* AsyncTransformation::RethrowExceptionOnResumption(BasicBlock* exception = m_comp->gtNewLclVarNode(exceptionLclNum, TYP_REF); - GenTreeCall* rethrowException = m_comp->gtNewHelperCallNode(CORINFO_HELP_THROWEXACT, TYP_VOID, exception); + GenTreeCall* rethrowException = m_comp->gtNewHelperCallNode(CORINFO_HELP_THROWEXACT, TYP_VOID, exception); m_comp->compCurBB = rethrowExceptionBB; m_comp->fgMorphTree(rethrowException); From 6f1f369fdfeb7a5a0019c86a8d3a74f943222c37 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 29 Apr 2025 22:47:07 +0200 Subject: [PATCH 18/21] Reword comment --- src/coreclr/jit/async.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 01877e918000c6..24e02e752f7354 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -1201,8 +1201,8 @@ BasicBlock* AsyncTransformation::CreateResumption(BasicBlock* bloc JITDUMP(" Creating resumption " FMT_BB " for state %u\n", resumeBB->bbNum, stateNum); - // We need to restore data before we restore GC pointers: restoring the - // data can also write the GC pointer fields (with nulls). + // We need to restore data before we restore GC pointers, since restoring + // the data may also write the GC pointer fields with nulls. unsigned resumeByteArrLclNum = BAD_VAR_NUM; if (layout.DataSize > 0) { From 273e93623946036949cdcb45a54af63d4afe90bd Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 29 Apr 2025 22:51:50 +0200 Subject: [PATCH 19/21] Remove another TODO --- src/coreclr/jit/async.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 24e02e752f7354..d4818b87074411 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -280,8 +280,7 @@ PhaseStatus AsyncTransformation::Run() // Ask the VM to create a resumption stub for this specific version of the // code. It is stored in the continuation as a function pointer, so we need // the fixed entry point here. - // TODO: Get once VM changes are merged - // m_resumeStub = m_comp->info.compCompHnd->getAsyncResumptionStub(); + m_resumeStub = m_comp->info.compCompHnd->getAsyncResumptionStub(); m_comp->info.compCompHnd->getFunctionFixedEntryPoint(m_resumeStub, false, &m_resumeStubLookup); m_returnedContinuationVar = m_comp->lvaGrabTemp(false DEBUGARG("returned continuation")); From 414e2c86f404a19f9fbcd5bd4c1f3de5b5e9424f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 29 Apr 2025 22:56:07 +0200 Subject: [PATCH 20/21] Remove unused function --- src/coreclr/jit/gentree.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 632751eef64d48..e57d334a2805b5 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4790,7 +4790,6 @@ class CallArgs CallArg* InsertAfter(Compiler* comp, CallArg* after, const NewCallArg& arg); CallArg* InsertAfterUnchecked(Compiler* comp, CallArg* after, const NewCallArg& arg); CallArg* InsertInstParam(Compiler* comp, GenTree* node); - CallArg* InsertAsyncContinuationParam(Compiler* comp, GenTree* node); CallArg* InsertAfterThisOrFirst(Compiler* comp, const NewCallArg& arg); void PushLateBack(CallArg* arg); void Remove(CallArg* arg); From f292350ae07f29a4928799e4027a8d8c45be7a6a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 29 Apr 2025 23:00:05 +0200 Subject: [PATCH 21/21] Undo a change --- src/coreclr/jit/importer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index e1ea136c7152e4..4585033f8e2f70 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -13966,7 +13966,7 @@ GenTree* Compiler::impInlineFetchArg(InlArgInfo& argInfo, const InlLclVarInfo& l // if it is a struct, because it requires some additional handling. if ((!varTypeIsStruct(lclTyp) && !argInfo.argHasSideEff && !argInfo.argHasGlobRef && - !argInfo.argHasCallerLocalRef && !argInfo.argIsByRefToStructLocal)) + !argInfo.argHasCallerLocalRef)) { /* Get a *LARGE* LCL_VAR node */ op1 = gtNewLclLNode(tmpNum, genActualType(lclTyp));