diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs index b2043e975345f1..98e2c4493f1e03 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs @@ -57,8 +57,9 @@ internal enum CorInfoContinuationFlags // should be placed at index 0 or 1 depending on whether the continuation // also expects a result. CORINFO_CONTINUATION_NEEDS_EXCEPTION = 2, - // If this bit is set the continuation has an OSR IL offset saved in the - // beginning of 'Data'. + // If this bit is set the continuation has the IL offset that inspired the + // OSR method saved in the beginning of 'Data', or -1 if the continuation + // belongs to a tier 0 method. CORINFO_CONTINUATION_OSR_IL_OFFSET_IN_DATA = 4, } diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 02d108c853ebb1..a63cf8a5a0cf68 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1708,8 +1708,9 @@ enum CorInfoContinuationFlags // should be placed at index 0 or 1 depending on whether the continuation // also expects a result. CORINFO_CONTINUATION_NEEDS_EXCEPTION = 2, - // If this bit is set the continuation has an OSR IL offset saved in the - // beginning of 'Data'. + // If this bit is set the continuation has the IL offset that inspired the + // OSR method saved in the beginning of 'Data', or -1 if the continuation + // belongs to a tier 0 method. CORINFO_CONTINUATION_OSR_IL_OFFSET_IN_DATA = 4, }; diff --git a/src/coreclr/inc/corjitflags.h b/src/coreclr/inc/corjitflags.h index 86d9b44a797c9f..d7a8349a81d61b 100644 --- a/src/coreclr/inc/corjitflags.h +++ b/src/coreclr/inc/corjitflags.h @@ -63,7 +63,7 @@ class CORJIT_FLAGS CORJIT_FLAG_RELATIVE_CODE_RELOCS = 29, // JIT should generate PC-relative address computations instead of EE relocation records CORJIT_FLAG_SOFTFP_ABI = 30, // Enable armel calling convention #endif - CORJIT_FLAG_ASYNC = 31, // Generate Code for use as an async function + CORJIT_FLAG_ASYNC = 31, // Generate code for use as an async function }; CORJIT_FLAGS() diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 288edf637a6dd4..f8167e8ae94241 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -89,6 +89,7 @@ set( JIT_SOURCES abi.cpp alloc.cpp assertionprop.cpp + async.cpp bitset.cpp block.cpp buildstring.cpp @@ -286,6 +287,7 @@ set( JIT_HEADERS abi.h alloc.h arraystack.h + async.h bitset.h layout.h bitsetasshortlong.h diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp new file mode 100644 index 00000000000000..d4818b87074411 --- /dev/null +++ b/src/coreclr/jit/async.cpp @@ -0,0 +1,1967 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" +#include "jitstd/algorithm.h" +#include "async.h" + +class AsyncLiveness +{ + Compiler* m_comp; + bool m_hasLiveness; + TreeLifeUpdater m_updater; + unsigned m_numVars; + +public: + AsyncLiveness(Compiler* comp, bool hasLiveness) + : m_comp(comp) + , m_hasLiveness(hasLiveness) + , m_updater(comp) + , m_numVars(comp->lvaCount) + { + } + + void StartBlock(BasicBlock* block); + void Update(GenTree* node); + bool IsLive(unsigned lclNum); + void GetLiveLocals(jitstd::vector& liveLocals, unsigned fullyDefinedRetBufLcl); + +private: + bool IsLocalCaptureUnnecessary(unsigned lclNum); +}; + +//------------------------------------------------------------------------ +// AsyncLiveness::StartBlock: +// Indicate that we are now starting a new block, and do relevant liveness +// updates for it. +// +// Parameters: +// block - The block that we are starting. +// +void AsyncLiveness::StartBlock(BasicBlock* block) +{ + if (!m_hasLiveness) + return; + + VarSetOps::Assign(m_comp, m_comp->compCurLife, block->bbLiveIn); +} + +//------------------------------------------------------------------------ +// AsyncLiveness::Update: +// Update liveness to be consistent with the specified node having been +// executed. +// +// Parameters: +// node - The node. +// +void AsyncLiveness::Update(GenTree* node) +{ + if (!m_hasLiveness) + return; + + m_updater.UpdateLife(node); +} + +//------------------------------------------------------------------------ +// AsyncLiveness::IsLocalCaptureUnnecessary: +// Check if capturing a specified local can be skipped. +// +// Parameters: +// lclNum - The local +// +// Returns: +// True if the local should not be captured. Even without liveness +// +bool AsyncLiveness::IsLocalCaptureUnnecessary(unsigned lclNum) +{ +#if FEATURE_FIXED_OUT_ARGS + if (lclNum == m_comp->lvaOutgoingArgSpaceVar) + { + return true; + } +#endif + + if (lclNum == m_comp->info.compRetBuffArg) + { + return true; + } + + if (lclNum == m_comp->lvaGSSecurityCookie) + { + // Initialized in prolog + return true; + } + + if (lclNum == m_comp->info.compLvFrameListRoot) + { + return true; + } + + if (lclNum == m_comp->lvaInlinedPInvokeFrameVar) + { + return true; + } + +#ifdef FEATURE_EH_WINDOWS_X86 + if (lclNum == m_comp->lvaShadowSPslotsVar) + { + // Only expected to be live in handlers + return true; + } +#endif + + if (lclNum == m_comp->lvaRetAddrVar) + { + return true; + } + + if (lclNum == m_comp->lvaAsyncContinuationArg) + { + return true; + } + + return false; +} + +//------------------------------------------------------------------------ +// AsyncLiveness::IsLive: +// Check if the specified local is live at this point and should be captured. +// +// Parameters: +// lclNum - The local +// +// Returns: +// True if the local is live and capturing it is necessary. +// +bool AsyncLiveness::IsLive(unsigned lclNum) +{ + if (IsLocalCaptureUnnecessary(lclNum)) + { + return false; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(lclNum); + + if (((dsc->TypeGet() == TYP_BYREF) && !dsc->IsImplicitByRef()) || + ((dsc->TypeGet() == TYP_STRUCT) && dsc->GetLayout()->HasGCByRef())) + { + // Even if these are address exposed we expect them to be dead at + // suspension points. TODO: It would be good to somehow verify these + // aren't obviously live, if the JIT creates live ranges that span a + // suspension point then this makes it quite hard to diagnose that. + return false; + } + + if (!m_hasLiveness) + { + return true; + } + + if (dsc->lvRefCnt(RCS_NORMAL) == 0) + { + return false; + } + + Compiler::lvaPromotionType promoType = m_comp->lvaGetPromotionType(dsc); + if (promoType == Compiler::PROMOTION_TYPE_INDEPENDENT) + { + // Independently promoted structs are handled only through their + // fields. + return false; + } + + if (promoType == Compiler::PROMOTION_TYPE_DEPENDENT) + { + // Dependently promoted structs are handled only through the base + // struct local. + // + // A dependently promoted struct is live if any of its fields are live. + + for (unsigned i = 0; i < dsc->lvFieldCnt; i++) + { + LclVarDsc* fieldDsc = m_comp->lvaGetDesc(dsc->lvFieldLclStart + i); + if (!fieldDsc->lvTracked || VarSetOps::IsMember(m_comp, m_comp->compCurLife, fieldDsc->lvVarIndex)) + { + return true; + } + } + + return false; + } + + if (dsc->lvIsStructField && (m_comp->lvaGetParentPromotionType(dsc) == Compiler::PROMOTION_TYPE_DEPENDENT)) + { + return false; + } + + return !dsc->lvTracked || VarSetOps::IsMember(m_comp, m_comp->compCurLife, dsc->lvVarIndex); +} + +//------------------------------------------------------------------------ +// AsyncLiveness::GetLiveLocals: +// Get live locals that should be captured at this point. +// +// Parameters: +// liveLocals - Vector to add live local information into +// fullyDefinedRetBufLcl - Local to skip even if live +// +void AsyncLiveness::GetLiveLocals(jitstd::vector& liveLocals, unsigned fullyDefinedRetBufLcl) +{ + for (unsigned lclNum = 0; lclNum < m_numVars; lclNum++) + { + if ((lclNum != fullyDefinedRetBufLcl) && IsLive(lclNum)) + { + liveLocals.push_back(LiveLocalInfo(lclNum)); + } + } +} + +//------------------------------------------------------------------------ +// TransformAsync: Run async transformation. +// +// Returns: +// Suitable phase status. +// +// Remarks: +// This transformation creates the state machine structure of the async +// function. After each async call a check for whether that async call +// suspended is inserted. If the check passes a continuation is allocated +// into which the live state is stored. The continuation is returned back to +// the caller to indicate that now this function also suspended. +// +// Associated with each suspension point is also resumption IR. The +// resumption IR restores all live state from the continuation object. IR is +// inserted at the beginning of the function to dispatch on the continuation +// (if one is present), which each suspension point having an associated +// state number that can be switched over. +// +PhaseStatus Compiler::TransformAsync() +{ + assert(compIsAsync()); + + AsyncTransformation transformation(this); + return transformation.Run(); +} + +//------------------------------------------------------------------------ +// AsyncTransformation::Run: +// Run the transformation over all the IR. +// +// Returns: +// Suitable phase status. +// +PhaseStatus AsyncTransformation::Run() +{ + ArrayStack worklist(m_comp->getAllocator(CMK_Async)); + + // First find all basic blocks with awaits in them. We'll have to track + // liveness in these basic blocks, so it does not help to record the calls + // ahead of time. + for (BasicBlock* block : m_comp->Blocks()) + { + for (GenTree* tree : LIR::AsRange(block)) + { + if (tree->IsCall() && tree->AsCall()->IsAsync() && !tree->AsCall()->IsTailCall()) + { + JITDUMP(FMT_BB " contains await(s)\n", block->bbNum); + worklist.Push(block); + break; + } + } + } + + JITDUMP("Found %d blocks with awaits\n", worklist.Height()); + + if (worklist.Height() <= 0) + { + return PhaseStatus::MODIFIED_NOTHING; + } + + // Ask the VM to create a resumption stub for this specific version of the + // code. It is stored in the continuation as a function pointer, so we need + // the fixed entry point here. + m_resumeStub = m_comp->info.compCompHnd->getAsyncResumptionStub(); + m_comp->info.compCompHnd->getFunctionFixedEntryPoint(m_resumeStub, false, &m_resumeStubLookup); + + m_returnedContinuationVar = m_comp->lvaGrabTemp(false DEBUGARG("returned continuation")); + m_comp->lvaGetDesc(m_returnedContinuationVar)->lvType = TYP_REF; + m_newContinuationVar = m_comp->lvaGrabTemp(false DEBUGARG("new continuation")); + m_comp->lvaGetDesc(m_newContinuationVar)->lvType = TYP_REF; + + m_comp->info.compCompHnd->getAsyncInfo(&m_asyncInfo); + +#ifdef JIT32_GCENCODER + // Due to a hard cap on epilogs we need a shared return here. + m_sharedReturnBB = m_comp->fgNewBBafter(BBJ_RETURN, m_comp->fgLastBBInMainFunction(), false); + m_sharedReturnBB->bbSetRunRarely(); + m_sharedReturnBB->clearTryIndex(); + m_sharedReturnBB->clearHndIndex(); + + if (m_comp->fgIsUsingProfileWeights()) + { + // All suspension BBs are cold, so we do not need to propagate any + // weights, but we do need to propagate the flag. + m_sharedReturnBB->SetFlags(BBF_PROF_WEIGHT); + } + + GenTree* continuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + GenTree* ret = m_comp->gtNewOperNode(GT_RETURN_SUSPEND, TYP_VOID, continuation); + LIR::AsRange(m_sharedReturnBB).InsertAtEnd(continuation, ret); + + JITDUMP("Created shared return BB " FMT_BB "\n", m_sharedReturnBB->bbNum); + + DISPRANGE(LIR::AsRange(m_sharedReturnBB)); +#endif + + // Compute liveness to be used for determining what must be captured on + // suspension. In unoptimized codegen we capture everything. + if (m_comp->opts.OptimizationEnabled()) + { + if (m_comp->m_dfsTree == nullptr) + { + m_comp->m_dfsTree = m_comp->fgComputeDfs(); + } + + m_comp->lvaComputeRefCounts(true, false); + m_comp->fgLocalVarLiveness(); + VarSetOps::AssignNoCopy(m_comp, m_comp->compCurLife, VarSetOps::MakeEmpty(m_comp)); + } + + AsyncLiveness liveness(m_comp, m_comp->opts.OptimizationEnabled()); + + // Now walk the IR for all the blocks that contain async calls. Keep track + // of liveness and outstanding LIR edges as we go; the LIR edges that cross + // async calls are additional live variables that must be spilled. + jitstd::vector defs(m_comp->getAllocator(CMK_Async)); + + for (int i = 0; i < worklist.Height(); i++) + { + assert(defs.size() == 0); + + BasicBlock* block = worklist.Bottom(i); + liveness.StartBlock(block); + + bool any; + do + { + any = false; + for (GenTree* tree : LIR::AsRange(block)) + { + // Remove all consumed defs; those are no longer 'live' LIR + // edges. + tree->VisitOperands([&defs](GenTree* op) { + if (op->IsValue()) + { + for (size_t i = defs.size(); i > 0; i--) + { + if (op == defs[i - 1]) + { + defs[i - 1] = defs[defs.size() - 1]; + defs.erase(defs.begin() + (defs.size() - 1), defs.end()); + break; + } + } + } + + return GenTree::VisitResult::Continue; + }); + + // Update liveness to reflect state after this node. + liveness.Update(tree); + + if (tree->IsCall() && tree->AsCall()->IsAsync() && !tree->AsCall()->IsTailCall()) + { + // Transform call; continue with the remainder block + Transform(block, tree->AsCall(), defs, liveness, &block); + defs.clear(); + any = true; + break; + } + + // Push a new definition if necessary; this defined value is + // now a live LIR edge. + if (tree->IsValue() && !tree->IsUnusedValue()) + { + defs.push_back(tree); + } + } + } while (any); + } + + // After transforming all async calls we have created resumption blocks; + // create the resumption switch. + CreateResumptionSwitch(); + + m_comp->fgInvalidateDfsTree(); + + return PhaseStatus::MODIFIED_EVERYTHING; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::Transform: +// Transform a single async call in the specified block. +// +// Parameters: +// block - The block containing the async call +// call - The async call +// defs - Current live LIR edges +// life - Liveness information about live locals +// remainder - [out] Remainder block after the transformation +// +void AsyncTransformation::Transform( + BasicBlock* block, GenTreeCall* call, jitstd::vector& defs, AsyncLiveness& life, BasicBlock** remainder) +{ +#ifdef DEBUG + if (m_comp->verbose) + { + printf("Processing call [%06u] in " FMT_BB "\n", Compiler::dspTreeID(call), block->bbNum); + printf(" %zu live LIR edges\n", defs.size()); + + if (defs.size() > 0) + { + const char* sep = " "; + for (GenTree* tree : defs) + { + printf("%s[%06u] (%s)", sep, Compiler::dspTreeID(tree), varTypeName(tree->TypeGet())); + sep = ", "; + } + + printf("\n"); + } + } +#endif + + m_liveLocalsScratch.clear(); + jitstd::vector& liveLocals = m_liveLocalsScratch; + + CreateLiveSetForSuspension(block, call, defs, life, liveLocals); + + ContinuationLayout layout = LayOutContinuation(block, call, liveLocals); + + CallDefinitionInfo callDefInfo = CanonicalizeCallDefinition(block, call, life); + + unsigned stateNum = (unsigned)m_resumptionBBs.size(); + JITDUMP(" Assigned state %u\n", stateNum); + + BasicBlock* suspendBB = CreateSuspension(block, stateNum, life, layout); + + CreateCheckAndSuspendAfterCall(block, callDefInfo, life, suspendBB, remainder); + + BasicBlock* resumeBB = CreateResumption(block, *remainder, call, callDefInfo, stateNum, layout); + + m_resumptionBBs.push_back(resumeBB); +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateLiveSetForSuspension: +// Create the set of live state to be captured for suspension, for the +// specified call. +// +// Parameters: +// block - The block containing the async call +// call - The async call +// defs - Current live LIR edges +// life - Liveness information about live locals +// liveLocals - Information about each live local. +// +void AsyncTransformation::CreateLiveSetForSuspension(BasicBlock* block, + GenTreeCall* call, + const jitstd::vector& defs, + AsyncLiveness& life, + jitstd::vector& liveLocals) +{ + unsigned fullyDefinedRetBufLcl = BAD_VAR_NUM; + CallArg* retbufArg = call->gtArgs.GetRetBufferArg(); + if (retbufArg != nullptr) + { + GenTree* retbuf = retbufArg->GetNode(); + if (retbuf->IsLclVarAddr()) + { + LclVarDsc* dsc = m_comp->lvaGetDesc(retbuf->AsLclVarCommon()); + ClassLayout* defLayout = m_comp->typGetObjLayout(call->gtRetClsHnd); + if (defLayout->GetSize() == dsc->lvExactSize()) + { + // This call fully defines this retbuf. There is no need to + // consider it live across the call since it is going to be + // overridden anyway. + fullyDefinedRetBufLcl = retbuf->AsLclVarCommon()->GetLclNum(); + JITDUMP(" V%02u is a fully defined retbuf and will not be considered live\n", fullyDefinedRetBufLcl); + } + } + } + + life.GetLiveLocals(liveLocals, fullyDefinedRetBufLcl); + LiftLIREdges(block, defs, liveLocals); + +#ifdef DEBUG + if (m_comp->verbose) + { + printf(" %zu live locals\n", liveLocals.size()); + + if (liveLocals.size() > 0) + { + const char* sep = " "; + for (LiveLocalInfo& inf : liveLocals) + { + printf("%sV%02u (%s)", sep, inf.LclNum, varTypeName(m_comp->lvaGetDesc(inf.LclNum)->TypeGet())); + sep = ", "; + } + + printf("\n"); + } + } +#endif +} + +//------------------------------------------------------------------------ +// AsyncTransformation::LiftLIREdges: +// Create locals capturing outstanding LIR edges and add information +// indicating that these locals are live. +// +// Parameters: +// block - The block containing the definitions of the LIR edges +// defs - Current outstanding LIR edges +// liveLocals - [out] Vector to add new live local information into +// +void AsyncTransformation::LiftLIREdges(BasicBlock* block, + const jitstd::vector& defs, + jitstd::vector& liveLocals) +{ + if (defs.size() <= 0) + { + return; + } + + for (GenTree* tree : defs) + { + // TODO-CQ: Enable this. It currently breaks our recognition of how the + // call is stored. + // if (tree->OperIs(GT_LCL_VAR)) + //{ + // LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon()); + // if (!dsc->IsAddressExposed()) + // { + // // No interference by IR invariants. + // LIR::AsRange(block).Remove(tree); + // LIR::AsRange(block).InsertAfter(beyond, tree); + // continue; + // } + //} + + LIR::Use use; + bool gotUse = LIR::AsRange(block).TryGetUse(tree, &use); + assert(gotUse); // Defs list should not contain unused values. + + unsigned newLclNum = use.ReplaceWithLclVar(m_comp); + liveLocals.push_back(LiveLocalInfo(newLclNum)); + GenTree* newUse = use.Def(); + LIR::AsRange(block).Remove(newUse); + LIR::AsRange(block).InsertBefore(use.User(), newUse); + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::LayOutContinuation: +// Create the layout of the GC pointer and data arrays in the continuation +// object. +// +// Parameters: +// block - The block containing the async call +// call - The async call +// liveLocals - [in, out] Information about each live local. Size/alignment +// information is read and offset/index information is written. +// +// Returns: +// Layout information. +// +ContinuationLayout AsyncTransformation::LayOutContinuation(BasicBlock* block, + GenTreeCall* call, + jitstd::vector& liveLocals) +{ + ContinuationLayout layout(liveLocals); + + for (LiveLocalInfo& inf : liveLocals) + { + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + + if ((dsc->TypeGet() == TYP_STRUCT) || dsc->IsImplicitByRef()) + { + ClassLayout* layout = dsc->GetLayout(); + assert(!layout->HasGCByRef()); + + if (layout->IsCustomLayout()) + { + inf.Alignment = 1; + inf.DataSize = layout->GetSize(); + inf.GCDataCount = layout->GetGCPtrCount(); + } + else + { + inf.Alignment = m_comp->info.compCompHnd->getClassAlignmentRequirement(layout->GetClassHandle()); + if ((layout->GetGCPtrCount() * TARGET_POINTER_SIZE) == layout->GetSize()) + { + inf.DataSize = 0; + } + else + { + inf.DataSize = layout->GetSize(); + } + + inf.GCDataCount = layout->GetGCPtrCount(); + } + } + else if (dsc->TypeGet() == TYP_REF) + { + inf.Alignment = TARGET_POINTER_SIZE; + inf.DataSize = 0; + inf.GCDataCount = 1; + } + else + { + assert(dsc->TypeGet() != TYP_BYREF); + + inf.Alignment = genTypeAlignments[dsc->TypeGet()]; + inf.DataSize = genTypeSize(dsc); + inf.GCDataCount = 0; + } + } + + jitstd::sort(liveLocals.begin(), liveLocals.end(), [](const LiveLocalInfo& lhs, const LiveLocalInfo& rhs) { + if (lhs.Alignment == rhs.Alignment) + { + // Prefer lowest local num first for same alignment. + return lhs.LclNum < rhs.LclNum; + } + + // Otherwise prefer highest alignment first. + return lhs.Alignment > rhs.Alignment; + }); + + // For OSR, we store the IL offset that inspired the OSR method at the + // beginning of the data (-1 in the tier0 version): + if (m_comp->doesMethodHavePatchpoints() || m_comp->opts.IsOSR()) + { + JITDUMP(" Method %s; keeping IL offset that inspired OSR method at the beginning of non-GC data\n", + m_comp->doesMethodHavePatchpoints() ? "has patchpoints" : "is an OSR method"); + layout.DataSize += sizeof(int); + } + + if (call->gtReturnType == TYP_STRUCT) + { + layout.ReturnStructLayout = m_comp->typGetObjLayout(call->gtRetClsHnd); + layout.ReturnSize = layout.ReturnStructLayout->GetSize(); + layout.ReturnInGCData = layout.ReturnStructLayout->HasGCPtr(); + } + else + { + layout.ReturnSize = genTypeSize(call->gtReturnType); + layout.ReturnInGCData = varTypeIsGC(call->gtReturnType); + } + + assert((layout.ReturnSize > 0) == (call->gtReturnType != TYP_VOID)); + + // The return value is always stored: + // 1. At index 0 in GCData if it is a TYP_REF or a struct with GC references + // 2. At index 0 in Data, for non OSR methods without GC ref returns + // 3. At index 4 in Data for OSR methods without GC ref returns. The + // continuation flags indicates this scenario with a flag. + if (layout.ReturnInGCData) + { + layout.GCRefsCount++; + } + else if (layout.ReturnSize > 0) + { + layout.ReturnValDataOffset = layout.DataSize; + layout.DataSize += layout.ReturnSize; + } + +#ifdef DEBUG + if (layout.ReturnSize > 0) + { + JITDUMP(" Will store return of type %s, size %u in", + call->gtReturnType == TYP_STRUCT ? layout.ReturnStructLayout->GetClassName() + : varTypeName(call->gtReturnType), + layout.ReturnSize); + + if (layout.ReturnInGCData) + { + JITDUMP(" GC data\n"); + } + else + { + JITDUMP(" non-GC data at offset %u\n", layout.ReturnValDataOffset); + } + } +#endif + + if (block->hasTryIndex()) + { + layout.ExceptionGCDataIndex = layout.GCRefsCount++; + JITDUMP(" " FMT_BB " is in try region %u; exception will be at GC@+%02u in GC data\n", block->bbNum, + block->getTryIndex(), layout.ExceptionGCDataIndex); + } + + for (LiveLocalInfo& inf : liveLocals) + { + layout.DataSize = roundUp(layout.DataSize, inf.Alignment); + + inf.DataOffset = layout.DataSize; + inf.GCDataIndex = layout.GCRefsCount; + + layout.DataSize += inf.DataSize; + layout.GCRefsCount += inf.GCDataCount; + } + +#ifdef DEBUG + if (m_comp->verbose) + { + printf(" Continuation layout (%u bytes, %u GC pointers):\n", layout.DataSize, layout.GCRefsCount); + for (LiveLocalInfo& inf : liveLocals) + { + printf(" +%03u (GC@+%02u) V%02u: %u bytes, %u GC pointers\n", inf.DataOffset, inf.GCDataIndex, + inf.LclNum, inf.DataSize, inf.GCDataCount); + } + } +#endif + + return layout; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CanonicalizeCallDefinition: +// Put the call definition in a canonical form. This ensures that either the +// value is defined by a LCL_ADDR retbuffer or by a +// STORE_LCL_VAR/STORE_LCL_FLD that follows the call node. +// +// Parameters: +// block - The block containing the async call +// call - The async call +// life - Liveness information about live locals +// +// Returns: +// Information about the definition after canonicalization. +// +CallDefinitionInfo AsyncTransformation::CanonicalizeCallDefinition(BasicBlock* block, + GenTreeCall* call, + AsyncLiveness& life) +{ + CallDefinitionInfo callDefInfo; + + callDefInfo.InsertAfter = call; + + CallArg* retbufArg = call->gtArgs.GetRetBufferArg(); + + if (!call->TypeIs(TYP_VOID) && !call->IsUnusedValue()) + { + assert(retbufArg == nullptr); + assert(call->gtNext != nullptr); + if (!call->gtNext->OperIsLocalStore() || (call->gtNext->Data() != call)) + { + LIR::Use use; + bool gotUse = LIR::AsRange(block).TryGetUse(call, &use); + assert(gotUse); + + use.ReplaceWithLclVar(m_comp); + } + else + { + // We will split after the store, but we still have to update liveness for it. + life.Update(call->gtNext); + } + + assert(call->gtNext->OperIsLocalStore() && (call->gtNext->Data() == call)); + callDefInfo.DefinitionNode = call->gtNext->AsLclVarCommon(); + callDefInfo.InsertAfter = call->gtNext; + } + + if (retbufArg != nullptr) + { + assert(call->TypeIs(TYP_VOID)); + + // For async methods we always expect retbufs to point to locals. We + // ensure this in impStoreStruct. + noway_assert(retbufArg->GetNode()->OperIs(GT_LCL_ADDR)); + + callDefInfo.DefinitionNode = retbufArg->GetNode()->AsLclVarCommon(); + } + + return callDefInfo; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateSuspension: +// Create the basic block that when branched to suspends execution after the +// specified async call. +// +// Parameters: +// block - The block containing the async call +// stateNum - State number assigned to this suspension point +// life - Liveness information about live locals +// layout - Layout information for the continuation object +// +// Returns: +// The new basic block that was created. +// +BasicBlock* AsyncTransformation::CreateSuspension(BasicBlock* block, + unsigned stateNum, + AsyncLiveness& life, + const ContinuationLayout& layout) +{ + if (m_lastSuspensionBB == nullptr) + { + m_lastSuspensionBB = m_comp->fgLastBBInMainFunction(); + } + + BasicBlock* suspendBB = m_comp->fgNewBBafter(BBJ_RETURN, m_lastSuspensionBB, false); + suspendBB->clearTryIndex(); + suspendBB->clearHndIndex(); + suspendBB->inheritWeightPercentage(block, 0); + m_lastSuspensionBB = suspendBB; + + if (m_sharedReturnBB != nullptr) + { + suspendBB->SetKindAndTargetEdge(BBJ_ALWAYS, m_comp->fgAddRefPred(m_sharedReturnBB, suspendBB)); + } + + JITDUMP(" Creating suspension " FMT_BB " for state %u\n", suspendBB->bbNum, stateNum); + + // Allocate continuation + GenTree* returnedContinuation = m_comp->gtNewLclvNode(m_returnedContinuationVar, TYP_REF); + + GenTreeCall* allocContinuation = + CreateAllocContinuationCall(life, returnedContinuation, layout.GCRefsCount, layout.DataSize); + + m_comp->compCurBB = suspendBB; + m_comp->fgMorphTree(allocContinuation); + + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, allocContinuation)); + + GenTree* storeNewContinuation = m_comp->gtNewStoreLclVarNode(m_newContinuationVar, allocContinuation); + LIR::AsRange(suspendBB).InsertAtEnd(storeNewContinuation); + + // Fill in 'Resume' + GenTree* newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned resumeOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationResumeFldHnd); + GenTree* resumeStubAddr = CreateResumptionStubAddrTree(); + GenTree* storeResume = StoreAtOffset(newContinuation, resumeOffset, resumeStubAddr, TYP_I_IMPL); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeResume)); + + // Fill in 'state' + newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned stateOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationStateFldHnd); + GenTree* stateNumNode = m_comp->gtNewIconNode((ssize_t)stateNum, TYP_INT); + GenTree* storeState = StoreAtOffset(newContinuation, stateOffset, stateNumNode, TYP_INT); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeState)); + + // Fill in 'flags' + unsigned continuationFlags = 0; + if (layout.ReturnInGCData) + continuationFlags |= CORINFO_CONTINUATION_RESULT_IN_GCDATA; + if (block->hasTryIndex()) + continuationFlags |= CORINFO_CONTINUATION_NEEDS_EXCEPTION; + if (m_comp->doesMethodHavePatchpoints() || m_comp->opts.IsOSR()) + continuationFlags |= CORINFO_CONTINUATION_OSR_IL_OFFSET_IN_DATA; + + newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned flagsOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationFlagsFldHnd); + GenTree* flagsNode = m_comp->gtNewIconNode((ssize_t)continuationFlags, TYP_INT); + GenTree* storeFlags = StoreAtOffset(newContinuation, flagsOffset, flagsNode, TYP_INT); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeFlags)); + + if (layout.GCRefsCount > 0) + { + FillInGCPointersOnSuspension(layout.Locals, suspendBB); + } + + if (layout.DataSize > 0) + { + FillInDataOnSuspension(layout.Locals, suspendBB); + } + + if (suspendBB->KindIs(BBJ_RETURN)) + { + newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + GenTree* ret = m_comp->gtNewOperNode(GT_RETURN_SUSPEND, TYP_VOID, newContinuation); + LIR::AsRange(suspendBB).InsertAtEnd(newContinuation, ret); + } + + return suspendBB; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateAllocContinuationCall: +// Create a call to the JIT helper that allocates a continuation. +// +// Parameters: +// life - Liveness information about live locals +// prevContinuation - IR node that has the value of the previous continuation object +// gcRefsCount - Number of GC refs to allocate in the continuation object +// dataSize - Number of bytes to allocate in the continuation object +// +// Returns: +// IR node representing the allocation. +// +GenTreeCall* AsyncTransformation::CreateAllocContinuationCall(AsyncLiveness& life, + GenTree* prevContinuation, + unsigned gcRefsCount, + unsigned dataSize) +{ + GenTree* gcRefsCountNode = m_comp->gtNewIconNode((ssize_t)gcRefsCount, TYP_I_IMPL); + GenTree* dataSizeNode = m_comp->gtNewIconNode((ssize_t)dataSize, TYP_I_IMPL); + // If VM requests that we report the method handle, or if we have a shared generic context method handle + // that is live here, then we need to call a different helper to keep the loader alive. + GenTree* methodHandleArg = nullptr; + GenTree* classHandleArg = nullptr; + if (((m_comp->info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_METHODDESC) != 0) && + life.IsLive(m_comp->info.compTypeCtxtArg)) + { + methodHandleArg = m_comp->gtNewLclvNode(m_comp->info.compTypeCtxtArg, TYP_I_IMPL); + } + else if (((m_comp->info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_METHODTABLE) != 0) && + life.IsLive(m_comp->info.compTypeCtxtArg)) + { + classHandleArg = m_comp->gtNewLclvNode(m_comp->info.compTypeCtxtArg, TYP_I_IMPL); + } + else if (m_asyncInfo.continuationsNeedMethodHandle) + { + methodHandleArg = m_comp->gtNewIconEmbMethHndNode(m_comp->info.compMethodHnd); + } + + if (methodHandleArg != nullptr) + { + return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION_METHOD, TYP_REF, prevContinuation, + gcRefsCountNode, dataSizeNode, methodHandleArg); + } + + if (classHandleArg != nullptr) + { + return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION_CLASS, TYP_REF, prevContinuation, + gcRefsCountNode, dataSizeNode, classHandleArg); + } + + return m_comp->gtNewHelperCallNode(CORINFO_HELP_ALLOC_CONTINUATION, TYP_REF, prevContinuation, gcRefsCountNode, + dataSizeNode); +} + +//------------------------------------------------------------------------ +// AsyncTransformation::FillInGCPointersOnSuspension: +// Create IR that fills the GC pointers of the continuation object. +// This also nulls out the GC pointers in the locals if the local has data +// parts that need to be stored. +// +// Parameters: +// liveLocals - Information about each live local. +// suspendBB - Basic block to add IR to. +// +void AsyncTransformation::FillInGCPointersOnSuspension(const jitstd::vector& liveLocals, + BasicBlock* suspendBB) +{ + unsigned objectArrLclNum = GetGCDataArrayVar(); + + GenTree* newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned gcDataOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationGCDataFldHnd); + GenTree* gcDataInd = LoadFromOffset(newContinuation, gcDataOffset, TYP_REF); + GenTree* storeAllocedObjectArr = m_comp->gtNewStoreLclVarNode(objectArrLclNum, gcDataInd); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeAllocedObjectArr)); + + for (const LiveLocalInfo& inf : liveLocals) + { + if (inf.GCDataCount <= 0) + { + continue; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + if (dsc->TypeGet() == TYP_REF) + { + GenTree* value = m_comp->gtNewLclvNode(inf.LclNum, TYP_REF); + GenTree* objectArr = m_comp->gtNewLclvNode(objectArrLclNum, TYP_REF); + GenTree* store = + StoreAtOffset(objectArr, OFFSETOF__CORINFO_Array__data + (inf.GCDataIndex * TARGET_POINTER_SIZE), value, + TYP_REF); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } + else + { + assert((dsc->TypeGet() == TYP_STRUCT) || dsc->IsImplicitByRef()); + ClassLayout* layout = dsc->GetLayout(); + unsigned numSlots = layout->GetSlotCount(); + unsigned gcRefIndex = 0; + for (unsigned i = 0; i < numSlots; i++) + { + var_types gcPtrType = layout->GetGCPtrType(i); + assert((gcPtrType == TYP_I_IMPL) || (gcPtrType == TYP_REF)); + if (gcPtrType != TYP_REF) + { + continue; + } + + GenTree* value; + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + value = LoadFromOffset(baseAddr, i * TARGET_POINTER_SIZE, TYP_REF); + } + else + { + value = m_comp->gtNewLclFldNode(inf.LclNum, TYP_REF, i * TARGET_POINTER_SIZE); + } + + GenTree* objectArr = m_comp->gtNewLclvNode(objectArrLclNum, TYP_REF); + unsigned offset = + OFFSETOF__CORINFO_Array__data + ((inf.GCDataIndex + gcRefIndex) * TARGET_POINTER_SIZE); + GenTree* store = StoreAtOffset(objectArr, offset, value, TYP_REF); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + + gcRefIndex++; + + if (inf.DataSize > 0) + { + // Null out the GC field in preparation of storing the rest. + GenTree* null = m_comp->gtNewNull(); + + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + store = StoreAtOffset(baseAddr, i * TARGET_POINTER_SIZE, null, TYP_REF); + } + else + { + store = m_comp->gtNewStoreLclFldNode(inf.LclNum, TYP_REF, i * TARGET_POINTER_SIZE, null); + } + + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } + } + + if (!dsc->IsImplicitByRef()) + { + m_comp->lvaSetVarDoNotEnregister(inf.LclNum DEBUGARG(DoNotEnregisterReason::LocalField)); + } + } + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::FillInDataOnSuspension: +// Create IR that fills the data array of the continuation object. +// +// Parameters: +// liveLocals - Information about each live local. +// suspendBB - Basic block to add IR to. +// +void AsyncTransformation::FillInDataOnSuspension(const jitstd::vector& liveLocals, BasicBlock* suspendBB) +{ + unsigned byteArrLclNum = GetDataArrayVar(); + + GenTree* newContinuation = m_comp->gtNewLclvNode(m_newContinuationVar, TYP_REF); + unsigned dataOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationDataFldHnd); + GenTree* dataInd = LoadFromOffset(newContinuation, dataOffset, TYP_REF); + GenTree* storeAllocedByteArr = m_comp->gtNewStoreLclVarNode(byteArrLclNum, dataInd); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storeAllocedByteArr)); + + if (m_comp->doesMethodHavePatchpoints() || m_comp->opts.IsOSR()) + { + GenTree* ilOffsetToStore; + if (m_comp->doesMethodHavePatchpoints()) + ilOffsetToStore = m_comp->gtNewIconNode(-1); + else + ilOffsetToStore = m_comp->gtNewIconNode((int)m_comp->info.compILEntry); + + GenTree* byteArr = m_comp->gtNewLclvNode(byteArrLclNum, TYP_REF); + unsigned offset = OFFSETOF__CORINFO_Array__data; + GenTree* storePatchpointOffset = StoreAtOffset(byteArr, offset, ilOffsetToStore, TYP_INT); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, storePatchpointOffset)); + } + + // Fill in data + for (const LiveLocalInfo& inf : liveLocals) + { + if (inf.DataSize <= 0) + { + continue; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + + GenTree* byteArr = m_comp->gtNewLclvNode(byteArrLclNum, TYP_REF); + unsigned offset = OFFSETOF__CORINFO_Array__data + inf.DataOffset; + + GenTree* value; + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + value = m_comp->gtNewBlkIndir(dsc->GetLayout(), baseAddr, GTF_IND_NONFAULTING); + } + else + { + value = m_comp->gtNewLclVarNode(inf.LclNum); + } + + GenTree* store; + if ((dsc->TypeGet() == TYP_STRUCT) || dsc->IsImplicitByRef()) + { + GenTree* cns = m_comp->gtNewIconNode((ssize_t)offset, TYP_I_IMPL); + GenTree* addr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, byteArr, cns); + // This is to heap, but all GC refs are nulled out already, so we can skip the write barrier. + // TODO-CQ: Backend does not care about GTF_IND_TGT_NOT_HEAP for STORE_BLK. + store = + m_comp->gtNewStoreBlkNode(dsc->GetLayout(), addr, value, GTF_IND_NONFAULTING | GTF_IND_TGT_NOT_HEAP); + } + else + { + store = StoreAtOffset(byteArr, offset, value, dsc->TypeGet()); + } + + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateCheckAndSuspendAfterCall: +// Split the block containing the specified async call, and create the IR +// that checks whether suspension should be done after an async call. +// +// Parameters: +// block - The block containing the async call +// callDefInfo - Information about the async call's definition +// life - Liveness information about live locals +// suspendBB - Basic block to add IR to +// remainder - [out] The remainder block containing the IR that was after the async call. +// +void AsyncTransformation::CreateCheckAndSuspendAfterCall(BasicBlock* block, + const CallDefinitionInfo& callDefInfo, + AsyncLiveness& life, + BasicBlock* suspendBB, + BasicBlock** remainder) +{ + GenTree* continuationArg = new (m_comp, GT_ASYNC_CONTINUATION) GenTree(GT_ASYNC_CONTINUATION, TYP_REF); + continuationArg->SetHasOrderingSideEffect(); + + GenTree* storeContinuation = m_comp->gtNewStoreLclVarNode(m_returnedContinuationVar, continuationArg); + LIR::AsRange(block).InsertAfter(callDefInfo.InsertAfter, continuationArg, storeContinuation); + + GenTree* null = m_comp->gtNewNull(); + GenTree* returnedContinuation = m_comp->gtNewLclvNode(m_returnedContinuationVar, TYP_REF); + GenTree* neNull = m_comp->gtNewOperNode(GT_NE, TYP_INT, returnedContinuation, null); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, neNull); + + LIR::AsRange(block).InsertAfter(storeContinuation, null, returnedContinuation, neNull, jtrue); + *remainder = m_comp->fgSplitBlockAfterNode(block, jtrue); + JITDUMP(" Remainder is " FMT_BB "\n", (*remainder)->bbNum); + + FlowEdge* retBBEdge = m_comp->fgAddRefPred(suspendBB, block); + block->SetCond(retBBEdge, block->GetTargetEdge()); + + block->GetTrueEdge()->setLikelihood(0); + block->GetFalseEdge()->setLikelihood(1); +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateResumption: +// Create the basic block that when branched to resumes execution on entry to +// the function. +// +// Parameters: +// block - The block containing the async call +// remainder - The block that contains the IR after the (split) async call +// call - The async call +// callDefInfo - Information about the async call's definition +// stateNum - State number assigned to this suspension point +// layout - Layout information for the continuation object +// +// Returns: +// The new basic block that was created. +// +BasicBlock* AsyncTransformation::CreateResumption(BasicBlock* block, + BasicBlock* remainder, + GenTreeCall* call, + const CallDefinitionInfo& callDefInfo, + unsigned stateNum, + const ContinuationLayout& layout) +{ + if (m_lastResumptionBB == nullptr) + { + m_lastResumptionBB = m_comp->fgLastBBInMainFunction(); + } + + BasicBlock* resumeBB = m_comp->fgNewBBafter(BBJ_ALWAYS, m_lastResumptionBB, true); + FlowEdge* remainderEdge = m_comp->fgAddRefPred(remainder, resumeBB); + + // It does not really make sense to inherit from the target, but given this + // is always 0% this just propagates the profile weight flag + sets + // BBF_RUN_RARELY. + resumeBB->inheritWeightPercentage(remainder, 0); + resumeBB->SetTargetEdge(remainderEdge); + resumeBB->clearTryIndex(); + resumeBB->clearHndIndex(); + resumeBB->SetFlags(BBF_ASYNC_RESUMPTION); + m_lastResumptionBB = resumeBB; + + JITDUMP(" Creating resumption " FMT_BB " for state %u\n", resumeBB->bbNum, stateNum); + + // We need to restore data before we restore GC pointers, since restoring + // the data may also write the GC pointer fields with nulls. + unsigned resumeByteArrLclNum = BAD_VAR_NUM; + if (layout.DataSize > 0) + { + resumeByteArrLclNum = GetDataArrayVar(); + + GenTree* newContinuation = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned dataOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationDataFldHnd); + GenTree* dataInd = LoadFromOffset(newContinuation, dataOffset, TYP_REF); + GenTree* storeAllocedByteArr = m_comp->gtNewStoreLclVarNode(resumeByteArrLclNum, dataInd); + + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, storeAllocedByteArr)); + + RestoreFromDataOnResumption(resumeByteArrLclNum, layout.Locals, resumeBB); + } + + unsigned resumeObjectArrLclNum = BAD_VAR_NUM; + BasicBlock* storeResultBB = resumeBB; + + if (layout.GCRefsCount > 0) + { + resumeObjectArrLclNum = GetGCDataArrayVar(); + + GenTree* newContinuation = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned gcDataOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationGCDataFldHnd); + GenTree* gcDataInd = LoadFromOffset(newContinuation, gcDataOffset, TYP_REF); + GenTree* storeAllocedObjectArr = m_comp->gtNewStoreLclVarNode(resumeObjectArrLclNum, gcDataInd); + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, storeAllocedObjectArr)); + + RestoreFromGCPointersOnResumption(resumeObjectArrLclNum, layout.Locals, resumeBB); + + if (layout.ExceptionGCDataIndex != UINT_MAX) + { + storeResultBB = RethrowExceptionOnResumption(block, remainder, resumeObjectArrLclNum, layout, resumeBB); + } + } + + // Copy call return value. + if (layout.ReturnSize > 0) + { + CopyReturnValueOnResumption(call, callDefInfo, resumeByteArrLclNum, resumeObjectArrLclNum, layout, + storeResultBB); + } + + return resumeBB; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::RestoreFromDataOnResumption: +// Create IR that restores locals from the data array of the continuation +// object. +// +// Parameters: +// resumeByteArrLclNum - Local that has the continuation object's data array +// liveLocals - Information about each live local. +// resumeBB - Basic block to append IR to +// +void AsyncTransformation::RestoreFromDataOnResumption(unsigned resumeByteArrLclNum, + const jitstd::vector& liveLocals, + BasicBlock* resumeBB) +{ + // Copy data + for (const LiveLocalInfo& inf : liveLocals) + { + if (inf.DataSize <= 0) + { + continue; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + + GenTree* byteArr = m_comp->gtNewLclvNode(resumeByteArrLclNum, TYP_REF); + unsigned offset = OFFSETOF__CORINFO_Array__data + inf.DataOffset; + GenTree* cns = m_comp->gtNewIconNode((ssize_t)offset, TYP_I_IMPL); + GenTree* addr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, byteArr, cns); + + GenTree* value; + if ((dsc->TypeGet() == TYP_STRUCT) || dsc->IsImplicitByRef()) + { + value = m_comp->gtNewBlkIndir(dsc->GetLayout(), addr, GTF_IND_NONFAULTING); + } + else + { + value = m_comp->gtNewIndir(dsc->TypeGet(), addr, GTF_IND_NONFAULTING); + } + + GenTree* store; + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + store = m_comp->gtNewStoreBlkNode(dsc->GetLayout(), baseAddr, value, + GTF_IND_NONFAULTING | GTF_IND_TGT_NOT_HEAP); + } + else + { + store = m_comp->gtNewStoreLclVarNode(inf.LclNum, value); + } + + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::RestoreFromGCPointersOnResumption: +// Create IR that restores locals from the GC pointers array of the +// continuation object. +// +// Parameters: +// resumeObjectArrLclNum - Local that has the continuation object's GC pointers array +// liveLocals - Information about each live local. +// resumeBB - Basic block to append IR to +// +void AsyncTransformation::RestoreFromGCPointersOnResumption(unsigned resumeObjectArrLclNum, + const jitstd::vector& liveLocals, + BasicBlock* resumeBB) +{ + for (const LiveLocalInfo& inf : liveLocals) + { + if (inf.GCDataCount <= 0) + { + continue; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(inf.LclNum); + if (dsc->TypeGet() == TYP_REF) + { + GenTree* objectArr = m_comp->gtNewLclvNode(resumeObjectArrLclNum, TYP_REF); + unsigned offset = OFFSETOF__CORINFO_Array__data + (inf.GCDataIndex * TARGET_POINTER_SIZE); + GenTree* value = LoadFromOffset(objectArr, offset, TYP_REF); + GenTree* store = m_comp->gtNewStoreLclVarNode(inf.LclNum, value); + + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + } + else + { + assert((dsc->TypeGet() == TYP_STRUCT) || dsc->IsImplicitByRef()); + ClassLayout* layout = dsc->GetLayout(); + unsigned numSlots = layout->GetSlotCount(); + unsigned gcRefIndex = 0; + for (unsigned i = 0; i < numSlots; i++) + { + var_types gcPtrType = layout->GetGCPtrType(i); + assert((gcPtrType == TYP_I_IMPL) || (gcPtrType == TYP_REF)); + if (gcPtrType != TYP_REF) + { + continue; + } + + GenTree* objectArr = m_comp->gtNewLclvNode(resumeObjectArrLclNum, TYP_REF); + unsigned offset = + OFFSETOF__CORINFO_Array__data + ((inf.GCDataIndex + gcRefIndex) * TARGET_POINTER_SIZE); + GenTree* value = LoadFromOffset(objectArr, offset, TYP_REF); + GenTree* store; + if (dsc->IsImplicitByRef()) + { + GenTree* baseAddr = m_comp->gtNewLclvNode(inf.LclNum, dsc->TypeGet()); + store = StoreAtOffset(baseAddr, i * TARGET_POINTER_SIZE, value, TYP_REF); + // Implicit byref args are never on heap + store->gtFlags |= GTF_IND_TGT_NOT_HEAP; + } + else + { + store = m_comp->gtNewStoreLclFldNode(inf.LclNum, TYP_REF, i * TARGET_POINTER_SIZE, value); + } + + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + + gcRefIndex++; + } + } + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::RethrowExceptionOnResumption: +// Create IR that checks for an exception and rethrows it at the original +// suspension point if necessary. +// +// Parameters: +// block - The block containing the async call +// remainder - The block that contains the IR after the (split) async call +// resumeObjectArrLclNum - Local that has the continuation object's GC pointers array +// layout - Layout information for the continuation object +// resumeBB - Basic block to append IR to +// +// Returns: +// The new non-exception successor basic block for resumption. This is the +// basic block where execution will continue if there was no exception to +// rethrow. +// +BasicBlock* AsyncTransformation::RethrowExceptionOnResumption(BasicBlock* block, + BasicBlock* remainder, + unsigned resumeObjectArrLclNum, + const ContinuationLayout& layout, + BasicBlock* resumeBB) +{ + JITDUMP(" We need to rethrow an exception\n"); + + BasicBlock* rethrowExceptionBB = + m_comp->fgNewBBinRegion(BBJ_THROW, block, /* runRarely */ true, /* insertAtEnd */ true); + JITDUMP(" Created " FMT_BB " to rethrow exception on resumption\n", rethrowExceptionBB->bbNum); + + BasicBlock* storeResultBB = m_comp->fgNewBBafter(BBJ_ALWAYS, resumeBB, true); + JITDUMP(" Created " FMT_BB " to store result when resuming with no exception\n", storeResultBB->bbNum); + + FlowEdge* rethrowEdge = m_comp->fgAddRefPred(rethrowExceptionBB, resumeBB); + FlowEdge* storeResultEdge = m_comp->fgAddRefPred(storeResultBB, resumeBB); + + assert(resumeBB->KindIs(BBJ_ALWAYS)); + m_comp->fgRemoveRefPred(resumeBB->GetTargetEdge()); + + resumeBB->SetCond(rethrowEdge, storeResultEdge); + rethrowEdge->setLikelihood(0); + storeResultEdge->setLikelihood(1); + rethrowExceptionBB->inheritWeightPercentage(resumeBB, 0); + storeResultBB->inheritWeightPercentage(resumeBB, 100); + JITDUMP(" Resumption " FMT_BB " becomes BBJ_COND to check for non-null exception\n", resumeBB->bbNum); + + FlowEdge* remainderEdge = m_comp->fgAddRefPred(remainder, storeResultBB); + storeResultBB->SetTargetEdge(remainderEdge); + + m_lastResumptionBB = storeResultBB; + + // Check if we have an exception. + unsigned exceptionLclNum = GetExceptionVar(); + GenTree* objectArr = m_comp->gtNewLclvNode(resumeObjectArrLclNum, TYP_REF); + unsigned exceptionOffset = OFFSETOF__CORINFO_Array__data + layout.ExceptionGCDataIndex * TARGET_POINTER_SIZE; + GenTree* exceptionInd = LoadFromOffset(objectArr, exceptionOffset, TYP_REF); + GenTree* storeException = m_comp->gtNewStoreLclVarNode(exceptionLclNum, exceptionInd); + LIR::AsRange(resumeBB).InsertAtEnd(LIR::SeqTree(m_comp, storeException)); + + GenTree* exception = m_comp->gtNewLclVarNode(exceptionLclNum, TYP_REF); + GenTree* null = m_comp->gtNewNull(); + GenTree* neNull = m_comp->gtNewOperNode(GT_NE, TYP_INT, exception, null); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, neNull); + LIR::AsRange(resumeBB).InsertAtEnd(exception, null, neNull, jtrue); + + exception = m_comp->gtNewLclVarNode(exceptionLclNum, TYP_REF); + + GenTreeCall* rethrowException = m_comp->gtNewHelperCallNode(CORINFO_HELP_THROWEXACT, TYP_VOID, exception); + + m_comp->compCurBB = rethrowExceptionBB; + m_comp->fgMorphTree(rethrowException); + + LIR::AsRange(rethrowExceptionBB).InsertAtEnd(LIR::SeqTree(m_comp, rethrowException)); + + storeResultBB->SetFlags(BBF_ASYNC_RESUMPTION); + JITDUMP(" Added " FMT_BB " to rethrow exception at suspension point\n", rethrowExceptionBB->bbNum); + + return storeResultBB; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CopyReturnValueOnResumption: +// Create IR that copies the return value from the continuation object to the +// right local. +// +// Parameters: +// call - The async call +// callDefInfo - Information about the async call's definition +// block - The block containing the async call +// resumeByteArrLclNum - Local that has the continuation object's data array +// resumeObjectArrLclNum - Local that has the continuation object's GC pointers array +// layout - Layout information for the continuation object +// storeResultBB - Basic block to append IR to +// +void AsyncTransformation::CopyReturnValueOnResumption(GenTreeCall* call, + const CallDefinitionInfo& callDefInfo, + unsigned resumeByteArrLclNum, + unsigned resumeObjectArrLclNum, + const ContinuationLayout& layout, + BasicBlock* storeResultBB) +{ + GenTree* resultBase; + unsigned resultOffset; + GenTreeFlags resultIndirFlags = GTF_IND_NONFAULTING; + if (layout.ReturnInGCData) + { + assert(resumeObjectArrLclNum != BAD_VAR_NUM); + resultBase = m_comp->gtNewLclvNode(resumeObjectArrLclNum, TYP_REF); + + if (call->gtReturnType == TYP_STRUCT) + { + // Boxed struct. + resultBase = LoadFromOffset(resultBase, OFFSETOF__CORINFO_Array__data, TYP_REF); + resultOffset = TARGET_POINTER_SIZE; // Offset of data inside box + } + else + { + assert(call->gtReturnType == TYP_REF); + resultOffset = OFFSETOF__CORINFO_Array__data; + } + } + else + { + assert(resumeByteArrLclNum != BAD_VAR_NUM); + resultBase = m_comp->gtNewLclvNode(resumeByteArrLclNum, TYP_REF); + resultOffset = OFFSETOF__CORINFO_Array__data + layout.ReturnValDataOffset; + if (layout.ReturnValDataOffset != 0) + resultIndirFlags = GTF_IND_UNALIGNED; + } + + assert(callDefInfo.DefinitionNode != nullptr); + LclVarDsc* resultLcl = m_comp->lvaGetDesc(callDefInfo.DefinitionNode); + assert((resultLcl->TypeGet() == TYP_STRUCT) == (call->gtReturnType == TYP_STRUCT)); + + // TODO-TP: We can use liveness to avoid generating a lot of this IR. + if (call->gtReturnType == TYP_STRUCT) + { + if (m_comp->lvaGetPromotionType(resultLcl) != Compiler::PROMOTION_TYPE_INDEPENDENT) + { + GenTree* resultOffsetNode = m_comp->gtNewIconNode((ssize_t)resultOffset, TYP_I_IMPL); + GenTree* resultAddr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, resultBase, resultOffsetNode); + GenTree* resultData = m_comp->gtNewBlkIndir(layout.ReturnStructLayout, resultAddr, resultIndirFlags); + GenTree* storeResult; + if ((callDefInfo.DefinitionNode->GetLclOffs() == 0) && + ClassLayout::AreCompatible(resultLcl->GetLayout(), layout.ReturnStructLayout)) + { + storeResult = m_comp->gtNewStoreLclVarNode(callDefInfo.DefinitionNode->GetLclNum(), resultData); + } + else + { + storeResult = m_comp->gtNewStoreLclFldNode(callDefInfo.DefinitionNode->GetLclNum(), TYP_STRUCT, + layout.ReturnStructLayout, + callDefInfo.DefinitionNode->GetLclOffs(), resultData); + } + + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_comp, storeResult)); + } + else + { + assert(!call->gtArgs.HasRetBuffer()); // Locals defined through retbufs are never independently promoted. + + if ((resultLcl->lvFieldCnt > 1) && !resultBase->OperIsLocal()) + { + unsigned resultBaseVar = GetResultBaseVar(); + GenTree* storeResultBase = m_comp->gtNewStoreLclVarNode(resultBaseVar, resultBase); + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_comp, storeResultBase)); + + resultBase = m_comp->gtNewLclVarNode(resultBaseVar, TYP_REF); + } + + assert(callDefInfo.DefinitionNode->OperIs(GT_STORE_LCL_VAR)); + for (unsigned i = 0; i < resultLcl->lvFieldCnt; i++) + { + unsigned fieldLclNum = resultLcl->lvFieldLclStart + i; + LclVarDsc* fieldDsc = m_comp->lvaGetDesc(fieldLclNum); + + unsigned fldOffset = resultOffset + fieldDsc->lvFldOffset; + GenTree* value = LoadFromOffset(resultBase, fldOffset, fieldDsc->TypeGet(), resultIndirFlags); + GenTree* store = m_comp->gtNewStoreLclVarNode(fieldLclNum, value); + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_comp, store)); + + if (i + 1 != resultLcl->lvFieldCnt) + { + resultBase = m_comp->gtCloneExpr(resultBase); + } + } + } + } + else + { + GenTree* value = LoadFromOffset(resultBase, resultOffset, call->gtReturnType, resultIndirFlags); + + GenTree* storeResult; + if (callDefInfo.DefinitionNode->OperIs(GT_STORE_LCL_VAR)) + { + storeResult = m_comp->gtNewStoreLclVarNode(callDefInfo.DefinitionNode->GetLclNum(), value); + } + else + { + storeResult = m_comp->gtNewStoreLclFldNode(callDefInfo.DefinitionNode->GetLclNum(), + callDefInfo.DefinitionNode->TypeGet(), + callDefInfo.DefinitionNode->GetLclOffs(), value); + } + + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_comp, storeResult)); + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::LoadFromOffset: +// Create a load. +// +// Parameters: +// base - Base address of the load +// offset - Offset to add on top of the base address +// type - Type of the load to create +// indirFlags - Flags to add to the load +// +// Returns: +// IR node of the load. +// +GenTreeIndir* AsyncTransformation::LoadFromOffset(GenTree* base, + unsigned offset, + var_types type, + GenTreeFlags indirFlags) +{ + assert(base->TypeIs(TYP_REF, TYP_BYREF, TYP_I_IMPL)); + GenTree* cns = m_comp->gtNewIconNode((ssize_t)offset, TYP_I_IMPL); + var_types addrType = base->TypeIs(TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF; + GenTree* addr = m_comp->gtNewOperNode(GT_ADD, addrType, base, cns); + GenTreeIndir* load = m_comp->gtNewIndir(type, addr, indirFlags); + return load; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::StoreAtOffset: +// Create a store. +// +// Parameters: +// base - Base address of the store +// offset - Offset to add on top of the base address +// value - Value to store +// storeType - Type of store +// +// Returns: +// IR node of the store. +// +GenTreeStoreInd* AsyncTransformation::StoreAtOffset(GenTree* base, unsigned offset, GenTree* value, var_types storeType) +{ + assert(base->TypeIs(TYP_REF, TYP_BYREF, TYP_I_IMPL)); + GenTree* cns = m_comp->gtNewIconNode((ssize_t)offset, TYP_I_IMPL); + var_types addrType = base->TypeIs(TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF; + GenTree* addr = m_comp->gtNewOperNode(GT_ADD, addrType, base, cns); + GenTreeStoreInd* store = m_comp->gtNewStoreIndNode(storeType, addr, value, GTF_IND_NONFAULTING); + return store; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::GetDataArrayVar: +// Create a new local to hold the data array of the continuation object. This +// local can be validly used for the entire suspension point; the returned +// local may be used by multiple suspension points. +// +// Returns: +// Local number. +// +unsigned AsyncTransformation::GetDataArrayVar() +{ + // Create separate locals unless we have many locals in the method for live + // range splitting purposes. This helps LSRA to avoid create additional + // callee saves that harm the prolog/epilog. + if ((m_dataArrayVar == BAD_VAR_NUM) || !m_comp->lvaHaveManyLocals()) + { + m_dataArrayVar = m_comp->lvaGrabTemp(false DEBUGARG("byte[] for continuation")); + m_comp->lvaGetDesc(m_dataArrayVar)->lvType = TYP_REF; + } + + return m_dataArrayVar; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::GetGCDataArrayVar: +// Create a new local to hold the GC pointers array of the continuation +// object. This local can be validly used for the entire suspension point; +// the returned local may be used by multiple suspension points. +// +// Returns: +// Local number. +// +unsigned AsyncTransformation::GetGCDataArrayVar() +{ + if ((m_gcDataArrayVar == BAD_VAR_NUM) || !m_comp->lvaHaveManyLocals()) + { + m_gcDataArrayVar = m_comp->lvaGrabTemp(false DEBUGARG("object[] for continuation")); + m_comp->lvaGetDesc(m_gcDataArrayVar)->lvType = TYP_REF; + } + + return m_gcDataArrayVar; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::GetResultBaseVar: +// Create a new local to hold the base address of the incoming result from +// the continuation. This local can be validly used for the entire suspension +// point; the returned local may be used by multiple suspension points. +// +// Returns: +// Local number. +// +unsigned AsyncTransformation::GetResultBaseVar() +{ + if ((m_resultBaseVar == BAD_VAR_NUM) || !m_comp->lvaHaveManyLocals()) + { + m_resultBaseVar = m_comp->lvaGrabTemp(false DEBUGARG("object for resuming result base")); + m_comp->lvaGetDesc(m_resultBaseVar)->lvType = TYP_REF; + } + + return m_resultBaseVar; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::GetExceptionVar: +// Create a new local to hold the exception in the continuation. This +// local can be validly used for the entire suspension point; the returned +// local may be used by multiple suspension points. +// +// Returns: +// Local number. +// +unsigned AsyncTransformation::GetExceptionVar() +{ + if ((m_exceptionVar == BAD_VAR_NUM) || !m_comp->lvaHaveManyLocals()) + { + m_exceptionVar = m_comp->lvaGrabTemp(false DEBUGARG("object for resuming exception")); + m_comp->lvaGetDesc(m_exceptionVar)->lvType = TYP_REF; + } + + return m_exceptionVar; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateResumptionStubAddrTree: +// Create a tree that represents the address of the resumption stub entry +// point. +// +// Returns: +// IR node. +// +GenTree* AsyncTransformation::CreateResumptionStubAddrTree() +{ + switch (m_resumeStubLookup.accessType) + { + case IAT_VALUE: + { + return CreateFunctionTargetAddr(m_resumeStub, m_resumeStubLookup); + } + case IAT_PVALUE: + { + GenTree* tree = CreateFunctionTargetAddr(m_resumeStub, m_resumeStubLookup); + tree = m_comp->gtNewIndir(TYP_I_IMPL, tree, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + return tree; + } + case IAT_PPVALUE: + { + noway_assert(!"Unexpected IAT_PPVALUE"); + return nullptr; + } + case IAT_RELPVALUE: + { + GenTree* addr = CreateFunctionTargetAddr(m_resumeStub, m_resumeStubLookup); + GenTree* tree = CreateFunctionTargetAddr(m_resumeStub, m_resumeStubLookup); + tree = m_comp->gtNewIndir(TYP_I_IMPL, tree, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + tree = m_comp->gtNewOperNode(GT_ADD, TYP_I_IMPL, tree, addr); + return tree; + } + default: + { + noway_assert(!"Bad accessType"); + return nullptr; + } + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateFunctionTargetAddr: +// Create a tree that represents the address of the resumption stub entry +// point. +// +// Returns: +// IR node. +// +GenTree* AsyncTransformation::CreateFunctionTargetAddr(CORINFO_METHOD_HANDLE methHnd, + const CORINFO_CONST_LOOKUP& lookup) +{ + GenTree* con = m_comp->gtNewIconHandleNode((size_t)lookup.addr, GTF_ICON_FTN_ADDR); + INDEBUG(con->AsIntCon()->gtTargetHandle = (size_t)methHnd); + return con; +} + +//------------------------------------------------------------------------ +// AsyncTransformation::CreateResumptionSwitch: +// Create the IR for the entry of the function that checks the continuation +// and dispatches on its state number. +// +void AsyncTransformation::CreateResumptionSwitch() +{ + m_comp->fgCreateNewInitBB(); + BasicBlock* newEntryBB = m_comp->fgFirstBB; + + GenTree* continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + GenTree* null = m_comp->gtNewNull(); + GenTree* neNull = m_comp->gtNewOperNode(GT_NE, TYP_INT, continuationArg, null); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, neNull); + LIR::AsRange(newEntryBB).InsertAtEnd(continuationArg, null, neNull, jtrue); + + FlowEdge* resumingEdge; + + if (m_resumptionBBs.size() == 1) + { + JITDUMP(" Redirecting entry " FMT_BB " directly to " FMT_BB " as it is the only resumption block\n", + newEntryBB->bbNum, m_resumptionBBs[0]->bbNum); + resumingEdge = m_comp->fgAddRefPred(m_resumptionBBs[0], newEntryBB); + } + else if (m_resumptionBBs.size() == 2) + { + BasicBlock* condBB = m_comp->fgNewBBbefore(BBJ_COND, m_resumptionBBs[0], true); + condBB->inheritWeightPercentage(newEntryBB, 0); + + FlowEdge* to0 = m_comp->fgAddRefPred(m_resumptionBBs[0], condBB); + FlowEdge* to1 = m_comp->fgAddRefPred(m_resumptionBBs[1], condBB); + condBB->SetCond(to1, to0); + to1->setLikelihood(0.5); + to0->setLikelihood(0.5); + + resumingEdge = m_comp->fgAddRefPred(condBB, newEntryBB); + + JITDUMP(" Redirecting entry " FMT_BB " to BBJ_COND " FMT_BB " for resumption with 2 states\n", + newEntryBB->bbNum, condBB->bbNum); + + continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned stateOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationStateFldHnd); + GenTree* stateOffsetNode = m_comp->gtNewIconNode((ssize_t)stateOffset, TYP_I_IMPL); + GenTree* stateAddr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, continuationArg, stateOffsetNode); + GenTree* stateInd = m_comp->gtNewIndir(TYP_INT, stateAddr, GTF_IND_NONFAULTING); + GenTree* zero = m_comp->gtNewZeroConNode(TYP_INT); + GenTree* stateNeZero = m_comp->gtNewOperNode(GT_NE, TYP_INT, stateInd, zero); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, stateNeZero); + + LIR::AsRange(condBB).InsertAtEnd(continuationArg, stateOffsetNode, stateAddr, stateInd, zero, stateNeZero, + jtrue); + } + else + { + BasicBlock* switchBB = m_comp->fgNewBBbefore(BBJ_SWITCH, m_resumptionBBs[0], true); + switchBB->inheritWeightPercentage(newEntryBB, 0); + + resumingEdge = m_comp->fgAddRefPred(switchBB, newEntryBB); + + JITDUMP(" Redirecting entry " FMT_BB " to BBJ_SWITCH " FMT_BB " for resumption with %zu states\n", + newEntryBB->bbNum, switchBB->bbNum, m_resumptionBBs.size()); + + continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned stateOffset = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationStateFldHnd); + GenTree* stateOffsetNode = m_comp->gtNewIconNode((ssize_t)stateOffset, TYP_I_IMPL); + GenTree* stateAddr = m_comp->gtNewOperNode(GT_ADD, TYP_BYREF, continuationArg, stateOffsetNode); + GenTree* stateInd = m_comp->gtNewIndir(TYP_INT, stateAddr, GTF_IND_NONFAULTING); + GenTree* switchNode = m_comp->gtNewOperNode(GT_SWITCH, TYP_VOID, stateInd); + + LIR::AsRange(switchBB).InsertAtEnd(continuationArg, stateOffsetNode, stateAddr, stateInd, switchNode); + + m_comp->fgHasSwitch = true; + + // Default case. TODO-CQ: Support bbsHasDefault = false before lowering. + m_resumptionBBs.push_back(m_resumptionBBs[0]); + BBswtDesc* swtDesc = new (m_comp, CMK_BasicBlock) BBswtDesc; + swtDesc->bbsCount = (unsigned)m_resumptionBBs.size(); + swtDesc->bbsHasDefault = true; + swtDesc->bbsDstTab = new (m_comp, CMK_Async) FlowEdge*[m_resumptionBBs.size()]; + + weight_t stateLikelihood = 1.0 / m_resumptionBBs.size(); + for (size_t i = 0; i < m_resumptionBBs.size(); i++) + { + swtDesc->bbsDstTab[i] = m_comp->fgAddRefPred(m_resumptionBBs[i], switchBB); + swtDesc->bbsDstTab[i]->setLikelihood(stateLikelihood); + } + + switchBB->SetSwitch(swtDesc); + } + + newEntryBB->SetCond(resumingEdge, newEntryBB->GetTargetEdge()); + resumingEdge->setLikelihood(0); + newEntryBB->GetFalseEdge()->setLikelihood(1); + + if (m_comp->doesMethodHavePatchpoints()) + { + JITDUMP(" Method has patch points...\n"); + // If we have patchpoints then first check if we need to resume in the OSR version. + BasicBlock* callHelperBB = m_comp->fgNewBBafter(BBJ_THROW, m_comp->fgLastBBInMainFunction(), false); + callHelperBB->bbSetRunRarely(); + callHelperBB->clearTryIndex(); + callHelperBB->clearHndIndex(); + + JITDUMP(" Created " FMT_BB " for transitions back into OSR method\n", callHelperBB->bbNum); + + BasicBlock* onContinuationBB = newEntryBB->GetTrueTarget(); + BasicBlock* checkILOffsetBB = m_comp->fgNewBBbefore(BBJ_COND, onContinuationBB, true); + + JITDUMP(" Created " FMT_BB " to check whether we should transition immediately to OSR\n", + checkILOffsetBB->bbNum); + + // Redirect newEntryBB -> onContinuationBB into newEntryBB -> checkILOffsetBB -> onContinuationBB + m_comp->fgRemoveRefPred(newEntryBB->GetTrueEdge()); + + FlowEdge* toCheckILOffsetBB = m_comp->fgAddRefPred(checkILOffsetBB, newEntryBB); + newEntryBB->SetTrueEdge(toCheckILOffsetBB); + toCheckILOffsetBB->setLikelihood(0); + checkILOffsetBB->inheritWeightPercentage(newEntryBB, 0); + + FlowEdge* toOnContinuationBB = m_comp->fgAddRefPred(onContinuationBB, checkILOffsetBB); + FlowEdge* toCallHelperBB = m_comp->fgAddRefPred(callHelperBB, checkILOffsetBB); + checkILOffsetBB->SetCond(toCallHelperBB, toOnContinuationBB); + toCallHelperBB->setLikelihood(0); + toOnContinuationBB->setLikelihood(1); + callHelperBB->inheritWeightPercentage(checkILOffsetBB, 0); + + // We need to dispatch to the OSR version if the IL offset is non-negative. + continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned offsetOfData = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationDataFldHnd); + GenTree* dataArr = LoadFromOffset(continuationArg, offsetOfData, TYP_REF); + unsigned offsetOfIlOffset = OFFSETOF__CORINFO_Array__data; + GenTree* ilOffset = LoadFromOffset(dataArr, offsetOfIlOffset, TYP_INT); + unsigned ilOffsetLclNum = m_comp->lvaGrabTemp(false DEBUGARG("IL offset for tier0 OSR method")); + m_comp->lvaGetDesc(ilOffsetLclNum)->lvType = TYP_INT; + GenTree* storeIlOffset = m_comp->gtNewStoreLclVarNode(ilOffsetLclNum, ilOffset); + LIR::AsRange(checkILOffsetBB).InsertAtEnd(LIR::SeqTree(m_comp, storeIlOffset)); + + ilOffset = m_comp->gtNewLclvNode(ilOffsetLclNum, TYP_INT); + GenTree* zero = m_comp->gtNewIconNode(0); + GenTree* geZero = m_comp->gtNewOperNode(GT_GE, TYP_INT, ilOffset, zero); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, geZero); + LIR::AsRange(checkILOffsetBB).InsertAtEnd(ilOffset, zero, geZero, jtrue); + + ilOffset = m_comp->gtNewLclvNode(ilOffsetLclNum, TYP_INT); + + GenTreeCall* callHelper = m_comp->gtNewHelperCallNode(CORINFO_HELP_PATCHPOINT_FORCED, TYP_VOID, ilOffset); + callHelper->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN; + + m_comp->compCurBB = callHelperBB; + m_comp->fgMorphTree(callHelper); + + LIR::AsRange(callHelperBB).InsertAtEnd(LIR::SeqTree(m_comp, callHelper)); + } + else if (m_comp->opts.IsOSR()) + { + JITDUMP(" Method is an OSR function\n"); + // If the tier-0 version resumed and then transitioned to the OSR + // version by normal means then we will see a non-zero continuation + // here that belongs to the tier0 method. In that case we should just + // ignore it, so create a BB that jumps back. + BasicBlock* onContinuationBB = newEntryBB->GetTrueTarget(); + BasicBlock* onNoContinuationBB = newEntryBB->GetFalseTarget(); + BasicBlock* checkILOffsetBB = m_comp->fgNewBBbefore(BBJ_COND, onContinuationBB, true); + + // Switch newEntryBB -> onContinuationBB into newEntryBB -> checkILOffsetBB + m_comp->fgRemoveRefPred(newEntryBB->GetTrueEdge()); + FlowEdge* toCheckILOffset = m_comp->fgAddRefPred(checkILOffsetBB, newEntryBB); + newEntryBB->SetTrueEdge(toCheckILOffset); + toCheckILOffset->setLikelihood(0); + checkILOffsetBB->inheritWeightPercentage(newEntryBB, 0); + + // Make checkILOffsetBB ->(true) onNoContinuationBB + // ->(false) onContinuationBB + + FlowEdge* toOnContinuationBB = m_comp->fgAddRefPred(onContinuationBB, checkILOffsetBB); + FlowEdge* toOnNoContinuationBB = m_comp->fgAddRefPred(onNoContinuationBB, checkILOffsetBB); + checkILOffsetBB->SetCond(toOnNoContinuationBB, toOnContinuationBB); + toOnContinuationBB->setLikelihood(0); + toOnNoContinuationBB->setLikelihood(1); + + JITDUMP(" Created " FMT_BB " to check for Tier-0 continuations\n", checkILOffsetBB->bbNum); + + continuationArg = m_comp->gtNewLclvNode(m_comp->lvaAsyncContinuationArg, TYP_REF); + unsigned offsetOfData = m_comp->info.compCompHnd->getFieldOffset(m_asyncInfo.continuationDataFldHnd); + GenTree* dataArr = LoadFromOffset(continuationArg, offsetOfData, TYP_REF); + unsigned offsetOfIlOffset = OFFSETOF__CORINFO_Array__data; + GenTree* ilOffset = LoadFromOffset(dataArr, offsetOfIlOffset, TYP_INT); + GenTree* zero = m_comp->gtNewIconNode(0); + GenTree* ltZero = m_comp->gtNewOperNode(GT_LT, TYP_INT, ilOffset, zero); + GenTree* jtrue = m_comp->gtNewOperNode(GT_JTRUE, TYP_VOID, ltZero); + LIR::AsRange(checkILOffsetBB).InsertAtEnd(LIR::SeqTree(m_comp, jtrue)); + } +} diff --git a/src/coreclr/jit/async.h b/src/coreclr/jit/async.h new file mode 100644 index 00000000000000..63e1db0a636ed0 --- /dev/null +++ b/src/coreclr/jit/async.h @@ -0,0 +1,152 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +struct LiveLocalInfo +{ + unsigned LclNum; + unsigned Alignment; + unsigned DataOffset; + unsigned DataSize; + unsigned GCDataIndex; + unsigned GCDataCount; + + explicit LiveLocalInfo(unsigned lclNum) + : LclNum(lclNum) + { + } +}; + +struct ContinuationLayout +{ + unsigned DataSize = 0; + unsigned GCRefsCount = 0; + ClassLayout* ReturnStructLayout = nullptr; + unsigned ReturnSize = 0; + bool ReturnInGCData = false; + unsigned ReturnValDataOffset = UINT_MAX; + unsigned ExceptionGCDataIndex = UINT_MAX; + const jitstd::vector& Locals; + + explicit ContinuationLayout(const jitstd::vector& locals) + : Locals(locals) + { + } +}; + +struct CallDefinitionInfo +{ + GenTreeLclVarCommon* DefinitionNode = nullptr; + + // Where to insert new IR for suspension checks. + GenTree* InsertAfter = nullptr; +}; + +class AsyncTransformation +{ + friend class AsyncLiveness; + + Compiler* m_comp; + jitstd::vector m_liveLocalsScratch; + CORINFO_ASYNC_INFO m_asyncInfo; + jitstd::vector m_resumptionBBs; + CORINFO_METHOD_HANDLE m_resumeStub = NO_METHOD_HANDLE; + CORINFO_CONST_LOOKUP m_resumeStubLookup; + unsigned m_returnedContinuationVar = BAD_VAR_NUM; + unsigned m_newContinuationVar = BAD_VAR_NUM; + unsigned m_dataArrayVar = BAD_VAR_NUM; + unsigned m_gcDataArrayVar = BAD_VAR_NUM; + unsigned m_resultBaseVar = BAD_VAR_NUM; + unsigned m_exceptionVar = BAD_VAR_NUM; + BasicBlock* m_lastSuspensionBB = nullptr; + BasicBlock* m_lastResumptionBB = nullptr; + BasicBlock* m_sharedReturnBB = nullptr; + + bool IsLive(unsigned lclNum); + void Transform(BasicBlock* block, + GenTreeCall* call, + jitstd::vector& defs, + class AsyncLiveness& life, + BasicBlock** remainder); + + void CreateLiveSetForSuspension(BasicBlock* block, + GenTreeCall* call, + const jitstd::vector& defs, + AsyncLiveness& life, + jitstd::vector& liveLocals); + + void LiftLIREdges(BasicBlock* block, + const jitstd::vector& defs, + jitstd::vector& liveLocals); + + ContinuationLayout LayOutContinuation(BasicBlock* block, + GenTreeCall* call, + jitstd::vector& liveLocals); + + CallDefinitionInfo CanonicalizeCallDefinition(BasicBlock* block, GenTreeCall* call, AsyncLiveness& life); + + BasicBlock* CreateSuspension(BasicBlock* block, + unsigned stateNum, + AsyncLiveness& life, + const ContinuationLayout& layout); + GenTreeCall* CreateAllocContinuationCall(AsyncLiveness& life, + GenTree* prevContinuation, + unsigned gcRefsCount, + unsigned int dataSize); + void FillInGCPointersOnSuspension(const jitstd::vector& liveLocals, BasicBlock* suspendBB); + void FillInDataOnSuspension(const jitstd::vector& liveLocals, BasicBlock* suspendBB); + void CreateCheckAndSuspendAfterCall(BasicBlock* block, + const CallDefinitionInfo& callDefInfo, + AsyncLiveness& life, + BasicBlock* suspendBB, + BasicBlock** remainder); + + BasicBlock* CreateResumption(BasicBlock* block, + BasicBlock* remainder, + GenTreeCall* call, + const CallDefinitionInfo& callDefInfo, + unsigned stateNum, + const ContinuationLayout& layout); + void RestoreFromDataOnResumption(unsigned resumeByteArrLclNum, + const jitstd::vector& liveLocals, + BasicBlock* resumeBB); + void RestoreFromGCPointersOnResumption(unsigned resumeObjectArrLclNum, + const jitstd::vector& liveLocals, + BasicBlock* resumeBB); + BasicBlock* RethrowExceptionOnResumption(BasicBlock* block, + BasicBlock* remainder, + unsigned resumeObjectArrLclNum, + const ContinuationLayout& layout, + BasicBlock* resumeBB); + void CopyReturnValueOnResumption(GenTreeCall* call, + const CallDefinitionInfo& callDefInfo, + unsigned resumeByteArrLclNum, + unsigned resumeObjectArrLclNum, + const ContinuationLayout& layout, + BasicBlock* storeResultBB); + + GenTreeIndir* LoadFromOffset(GenTree* base, + unsigned offset, + var_types type, + GenTreeFlags indirFlags = GTF_IND_NONFAULTING); + GenTreeStoreInd* StoreAtOffset(GenTree* base, unsigned offset, GenTree* value, var_types storeType); + + unsigned GetDataArrayVar(); + unsigned GetGCDataArrayVar(); + unsigned GetResultBaseVar(); + unsigned GetExceptionVar(); + + GenTree* CreateResumptionStubAddrTree(); + GenTree* CreateFunctionTargetAddr(CORINFO_METHOD_HANDLE methHnd, const CORINFO_CONST_LOOKUP& lookup); + + void CreateResumptionSwitch(); + +public: + AsyncTransformation(Compiler* comp) + : m_comp(comp) + , m_liveLocalsScratch(comp->getAllocator(CMK_Async)) + , m_resumptionBBs(comp->getAllocator(CMK_Async)) + { + } + + PhaseStatus Run(); +}; diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index d0a6833400faa1..cbbf8249c669dc 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -527,6 +527,7 @@ void BasicBlock::dspFlags() const {BBF_HAS_ALIGN, "has-align"}, {BBF_HAS_MDARRAYREF, "mdarr"}, {BBF_NEEDS_GCPOLL, "gcpoll"}, + {BBF_ASYNC_RESUMPTION, "resume"}, }; bool first = true; diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 35595928fc7a5c..c2e922ae47cce0 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -463,6 +463,7 @@ enum BasicBlockFlags : uint64_t BBF_HAS_VALUE_PROFILE = MAKE_BBFLAG(38), // Block has a node that needs a value probing BBF_HAS_NEWARR = MAKE_BBFLAG(39), // BB contains 'new' of an array type. BBF_MAY_HAVE_BOUNDS_CHECKS = MAKE_BBFLAG(40), // BB *likely* has a bounds check (after rangecheck phase). + BBF_ASYNC_RESUMPTION = MAKE_BBFLAG(41), // Block is a resumption block in an async method // The following are sets of flags. @@ -488,7 +489,7 @@ enum BasicBlockFlags : uint64_t // TODO: Should BBF_RUN_RARELY be added to BBF_SPLIT_GAINED ? BBF_SPLIT_GAINED = BBF_DONT_REMOVE | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_PROF_WEIGHT | BBF_HAS_NEWARR | \ - BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_HISTOGRAM_PROFILE | BBF_HAS_VALUE_PROFILE | BBF_HAS_MDARRAYREF | BBF_NEEDS_GCPOLL | BBF_MAY_HAVE_BOUNDS_CHECKS, + BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_HISTOGRAM_PROFILE | BBF_HAS_VALUE_PROFILE | BBF_HAS_MDARRAYREF | BBF_NEEDS_GCPOLL | BBF_MAY_HAVE_BOUNDS_CHECKS | BBF_ASYNC_RESUMPTION, // Flags that must be propagated to a new block if code is copied from a block to a new block. These are flags that // limit processing of a block if the code in question doesn't exist. This is conservative; we might not @@ -1411,6 +1412,16 @@ struct BasicBlock : private LIR::Range m_firstNode = tree; } + GenTree* GetLastLIRNode() const + { + return m_lastNode; + } + + void SetLastLIRNode(GenTree* tree) + { + m_lastNode = tree; + } + EntryState* bbEntryState; // verifier tracked state of all entries in stack. #define NO_BASE_TMP UINT_MAX // base# to use when we have none diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 74946282005559..fb33bd7773849e 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1134,6 +1134,7 @@ class CodeGen final : public CodeGenInterface #ifdef SWIFT_SUPPORT void genCodeForSwiftErrorReg(GenTree* tree); #endif // SWIFT_SUPPORT + void genCodeForAsyncContinuation(GenTree* tree); void genCodeForNullCheck(GenTreeIndir* tree); void genCodeForCmpXchg(GenTreeCmpXchg* tree); void genCodeForReuseVal(GenTree* treeNode); @@ -1266,6 +1267,8 @@ class CodeGen final : public CodeGenInterface #endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 void genReturn(GenTree* treeNode); + void genReturnSuspend(GenTreeUnOp* treeNode); + void genMarkReturnGCInfo(); #ifdef SWIFT_SUPPORT void genSwiftErrorReturn(GenTree* treeNode); diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index b0e6c091bb65fc..239b89877e515f 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -2081,7 +2081,16 @@ regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskC // We can't do this optimization with callee saved floating point registers because // the stack would be allocated in a wrong spot. if (maskCalleeSavedFloat != RBM_NONE) + { + return RBM_NONE; + } + + // We similarly skip it for async due to the extra async continuation + // return that may be overridden by the pop. + if (compiler->compIsAsync()) + { return RBM_NONE; + } // Allocate space for small frames by pushing extra registers. It generates smaller and faster code // that extra sub sp,XXX/add sp,XXX. diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 3af6fe28fd3f64..887bc594d24b35 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -295,6 +295,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; #endif // SWIFT_SUPPORT + case GT_RETURN_SUSPEND: + genReturnSuspend(treeNode->AsUnOp()); + break; + case GT_LEA: // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction. genLeaInstruction(treeNode->AsAddrMode()); @@ -510,6 +514,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genConsumeReg(treeNode); break; + case GT_ASYNC_CONTINUATION: + genCodeForAsyncContinuation(treeNode); + break; + case GT_PINVOKE_PROLOG: noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask(compiler->info.compCallConv)) == 0); @@ -3489,7 +3497,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call) assert(params.secondRetSize != EA_BYREF); #endif - params.isJump = call->IsFastTailCall(); + params.isJump = call->IsFastTailCall(); + params.hasAsyncRet = call->IsAsync(); // We need to propagate the debug information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. @@ -3537,6 +3546,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } } #endif // DEBUG + GenTree* target = getCallTarget(call, ¶ms.methHnd); if (target != nullptr) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 459cf0772db574..33b858fabd8008 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -1953,6 +1953,11 @@ void CodeGen::genGenerateMachineCode() printf("; OSR variant for entry point 0x%x\n", compiler->info.compILEntry); } + if (compiler->compIsAsync()) + { + printf("; async\n"); + } + if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT) { printf("; optimized code\n"); @@ -6880,23 +6885,14 @@ void CodeGen::genReturn(GenTree* treeNode) } } - if (treeNode->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET)) + if (treeNode->OperIs(GT_RETURN) && compiler->compIsAsync()) { - const ReturnTypeDesc& retTypeDesc = compiler->compRetTypeDesc; + instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_ASYNC_CONTINUATION_RET); + } - if (compiler->compMethodReturnsRetBufAddr()) - { - gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF); - } - else - { - unsigned retRegCount = retTypeDesc.GetReturnRegCount(); - for (unsigned i = 0; i < retRegCount; ++i) - { - gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv), - retTypeDesc.GetReturnRegType(i)); - } - } + if (treeNode->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET)) + { + genMarkReturnGCInfo(); } #ifdef PROFILING_SUPPORTED @@ -6963,6 +6959,83 @@ void CodeGen::genSwiftErrorReturn(GenTree* treeNode) } #endif // SWIFT_SUPPORT +//------------------------------------------------------------------------ +// genReturnSuspend: +// Generate code for a GT_RETURN_SUSPEND node +// +// Arguments: +// treeNode - The node +// +void CodeGen::genReturnSuspend(GenTreeUnOp* treeNode) +{ + GenTree* op = treeNode->gtGetOp1(); + assert(op->TypeIs(TYP_REF)); + + regNumber reg = genConsumeReg(op); + inst_Mov(TYP_REF, REG_ASYNC_CONTINUATION_RET, reg, /* canSkip */ true); + + ReturnTypeDesc retTypeDesc = compiler->compRetTypeDesc; + unsigned numRetRegs = retTypeDesc.GetReturnRegCount(); + for (unsigned i = 0; i < numRetRegs; i++) + { + if (varTypeIsGC(retTypeDesc.GetReturnRegType(i))) + { + regNumber returnReg = retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv); + instGen_Set_Reg_To_Zero(EA_PTRSIZE, returnReg); + } + } + + genMarkReturnGCInfo(); +} + +//------------------------------------------------------------------------ +// genMarkReturnGCInfo: +// Mark GC and non-GC pointers of return registers going into the epilog.. +// +void CodeGen::genMarkReturnGCInfo() +{ + const ReturnTypeDesc& retTypeDesc = compiler->compRetTypeDesc; + + if (compiler->compMethodReturnsRetBufAddr()) + { + gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF); + } + else + { + unsigned retRegCount = retTypeDesc.GetReturnRegCount(); + for (unsigned i = 0; i < retRegCount; ++i) + { + gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv), + retTypeDesc.GetReturnRegType(i)); + } + } + + if (compiler->compIsAsync()) + { + gcInfo.gcMarkRegPtrVal(REG_ASYNC_CONTINUATION_RET, TYP_REF); + } +} + +//------------------------------------------------------------------------ +// genCodeForAsyncContinuation: +// Generate code for a GT_ASYNC_CONTINUATION node. +// +// Arguments: +// tree - The node +// +void CodeGen::genCodeForAsyncContinuation(GenTree* tree) +{ + assert(tree->OperIs(GT_ASYNC_CONTINUATION)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + + inst_Mov(targetType, targetReg, REG_ASYNC_CONTINUATION_RET, /* canSkip */ true); + genTransferRegGCState(targetReg, REG_ASYNC_CONTINUATION_RET); + + genProduceReg(tree); +} + //------------------------------------------------------------------------ // isStructReturn: Returns whether the 'treeNode' is returning a struct. // diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 4b18015182c1ac..d7d0ff2d7c3be9 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -503,8 +503,13 @@ void CodeGen::genCodeForBBlist() } } + if (compiler->compIsAsync()) + { + nonVarPtrRegs &= ~RBM_ASYNC_CONTINUATION_RET; + } + // For a tailcall arbitrary argument registers may be live into the - // prolog. Skip validating those. + // epilog. Skip validating those. if (block->HasFlag(BBF_HAS_JMP)) { nonVarPtrRegs &= ~fullIntArgRegMask(CorInfoCallConvExtension::Managed); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index ad11faa95241f7..04517137bd2fd1 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -5859,7 +5859,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } } - params.isJump = call->IsFastTailCall(); + params.isJump = call->IsFastTailCall(); + params.hasAsyncRet = call->IsAsync(); // We need to propagate the debug information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 167d7a16def46b..58fba35c3a67cc 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6051,7 +6051,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } } - params.isJump = call->IsFastTailCall(); + params.isJump = call->IsFastTailCall(); + params.hasAsyncRet = call->IsAsync(); // We need to propagate the debug information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 18c12ee3d8770a..c33e2bf2888c78 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2038,6 +2038,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; #endif // SWIFT_SUPPORT + case GT_RETURN_SUSPEND: + genReturnSuspend(treeNode->AsUnOp()); + break; + case GT_LEA: // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction. genLeaInstruction(treeNode->AsAddrMode()); @@ -2222,6 +2226,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genConsumeReg(treeNode); break; + case GT_ASYNC_CONTINUATION: + genCodeForAsyncContinuation(treeNode); + break; + #if defined(FEATURE_EH_WINDOWS_X86) case GT_END_LFIN: { @@ -6322,7 +6330,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA } } - params.isJump = call->IsFastTailCall(); + params.isJump = call->IsFastTailCall(); + params.hasAsyncRet = call->IsAsync(); // We need to propagate the IL offset information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. @@ -10553,10 +10562,10 @@ void CodeGen::genFnEpilog(BasicBlock* block) if (frameSize > 0) { #ifdef TARGET_X86 - /* Add 'compiler->compLclFrameSize' to ESP */ - /* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */ + // Add 'compiler->compLclFrameSize' to ESP. Use "pop ECX" for that, except in cases + // where ECX may contain some state. - if ((frameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed) + if ((frameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed && !compiler->compIsAsync()) { inst_RV(INS_pop, REG_ECX, TYP_I_IMPL); regSet.verifyRegUsed(REG_ECX); @@ -10564,8 +10573,8 @@ void CodeGen::genFnEpilog(BasicBlock* block) else #endif // TARGET_X86 { - /* Add 'compiler->compLclFrameSize' to ESP */ - /* Generate "add esp, " */ + // Add 'compiler->compLclFrameSize' to ESP + // Generate "add esp, " inst_RV_IV(INS_add, REG_SPBASE, frameSize, EA_PTRSIZE); } } @@ -10643,7 +10652,8 @@ void CodeGen::genFnEpilog(BasicBlock* block) // do nothing before popping the callee-saved registers } #ifdef TARGET_X86 - else if ((compiler->compLclFrameSize == REGSIZE_BYTES) && !compiler->compJmpOpUsed) + else if ((compiler->compLclFrameSize == REGSIZE_BYTES) && !compiler->compJmpOpUsed && + !compiler->compIsAsync()) { // "pop ecx" will make ESP point to the callee-saved registers inst_RV(INS_pop, REG_ECX, TYP_I_IMPL); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 1ea2e0a4c6be3d..52721a6ee0ff10 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -488,8 +488,9 @@ Compiler::Compiler(ArenaAllocator* arena, info.compILCodeSize = methodInfo->ILCodeSize; info.compILImportSize = 0; - info.compHasNextCallRetAddr = false; - info.compIsVarArgs = false; + info.compHasNextCallRetAddr = false; + info.compIsVarArgs = false; + info.compUsesAsyncContinuation = false; } //------------------------------------------------------------------------ @@ -3159,6 +3160,11 @@ void Compiler::compInitOptions(JitFlags* jitFlags) { printf("OPTIONS: Jit invoked for AOT\n"); } + + if (compIsAsync()) + { + printf("OPTIONS: compilation is an async state machine\n"); + } } #endif @@ -5006,6 +5012,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl } #endif // TARGET_ARM + if (compIsAsync()) + { + DoPhase(this, PHASE_ASYNC, &Compiler::TransformAsync); + } + // Assign registers to variables, etc. // Create LinearScan before Lowering, so that Lowering can call LinearScan methods diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 534d1fd4cec3eb..a1fad2ce2b59d7 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -683,6 +683,15 @@ class LclVarDsc unsigned char lvAllDefsAreNoGc : 1; // For pinned locals: true if all defs of this local are no-gc unsigned char lvStackAllocatedObject : 1; // Local is a stack allocated object (class, box, array, ...) + bool IsImplicitByRef() + { +#if FEATURE_IMPLICIT_BYREFS + return lvIsImplicitByRef; +#else + return false; +#endif + } + // lvIsMultiRegArgOrRet() // returns true if this is a multireg LclVar struct used in an argument context // or if this is a multireg LclVar struct assigned from a multireg call @@ -3052,7 +3061,7 @@ class Compiler GenTreeCall* gtNewIndCallNode(GenTree* addr, var_types type, const DebugInfo& di = DebugInfo()); GenTreeCall* gtNewHelperCallNode( - unsigned helper, var_types type, GenTree* arg1 = nullptr, GenTree* arg2 = nullptr, GenTree* arg3 = nullptr); + unsigned helper, var_types type, GenTree* arg1 = nullptr, GenTree* arg2 = nullptr, GenTree* arg3 = nullptr, GenTree* arg4 = nullptr); GenTreeCall* gtNewRuntimeLookupHelperCallNode(CORINFO_RUNTIME_LOOKUP* pRuntimeLookup, GenTree* ctxTree, @@ -3975,6 +3984,9 @@ class Compiler unsigned lvaSwiftErrorLocal; #endif + // Variable representing async continuation argument passed. + unsigned lvaAsyncContinuationArg = BAD_VAR_NUM; + #if defined(DEBUG) && defined(TARGET_XARCH) unsigned lvaReturnSpCheck = BAD_VAR_NUM; // Stores SP to confirm it is not corrupted on return. @@ -4086,6 +4098,7 @@ class Compiler void lvaInitUserArgs(unsigned* curVarNum, unsigned skipArgs, unsigned takeArgs); void lvaInitGenericsCtxt(unsigned* curVarNum); void lvaInitVarArgsHandle(unsigned* curVarNum); + void lvaInitAsyncContinuation(unsigned* curVarNum); void lvaInitVarDsc(LclVarDsc* varDsc, unsigned varNum, @@ -4826,6 +4839,8 @@ class Compiler bool impMatchIsInstBooleanConversion(const BYTE* codeAddr, const BYTE* codeEndp, int* consumed); + bool impMatchAwaitPattern(const BYTE * codeAddr, const BYTE * codeEndp, int* configVal); + GenTree* impCastClassOrIsInstToTree( GenTree* op1, GenTree* op2, CORINFO_RESOLVED_TOKEN* pResolvedToken, bool isCastClass, bool* booleanCheck, IL_OFFSET ilOffset); @@ -5503,6 +5518,8 @@ class Compiler PhaseStatus placeLoopAlignInstructions(); #endif + PhaseStatus TransformAsync(); + // This field keep the R2R helper call that would be inserted to trigger the constructor // of the static class. It is set as nongc or gc static base if they are imported, so // CSE can eliminate the repeated call, or the chepeast helper function that triggers it. @@ -7161,6 +7178,7 @@ class Compiler void optPrintCSEDataFlowSet(EXPSET_VALARG_TP cseDataFlowSet, bool includeBits = true); EXPSET_TP cseCallKillsMask; // Computed once - A mask that is used to kill available CSEs at callsites + EXPSET_TP cseAsyncKillsMask; // Computed once - A mask that is used to kill available BYREF CSEs at async suspension points static const size_t s_optCSEhashSizeInitial; static const size_t s_optCSEhashGrowthFactor; @@ -7249,6 +7267,7 @@ class Compiler unsigned optValnumCSE_Index(GenTree* tree, Statement* stmt); bool optValnumCSE_Locate(CSE_HeuristicCommon* heuristic); void optValnumCSE_InitDataFlow(); + void optValnumCSE_SetUpAsyncByrefKills(); void optValnumCSE_DataFlow(); void optValnumCSE_Availability(); void optValnumCSE_Heuristic(CSE_HeuristicCommon* heuristic); @@ -10714,12 +10733,13 @@ class Compiler // (2) the code is hot/cold split, and we issued less code than we expected // in the cold section (the hot section will always be padded out to compTotalHotCodeSize). - bool compIsStatic : 1; // Is the method static (no 'this' pointer)? - bool compIsVarArgs : 1; // Does the method have varargs parameters? - bool compInitMem : 1; // Is the CORINFO_OPT_INIT_LOCALS bit set in the method info options? - bool compProfilerCallback : 1; // JIT inserted a profiler Enter callback - bool compPublishStubParam : 1; // EAX captured in prolog will be available through an intrinsic - bool compHasNextCallRetAddr : 1; // The NextCallReturnAddress intrinsic is used. + bool compIsStatic : 1; // Is the method static (no 'this' pointer)? + bool compIsVarArgs : 1; // Does the method have varargs parameters? + bool compInitMem : 1; // Is the CORINFO_OPT_INIT_LOCALS bit set in the method info options? + bool compProfilerCallback : 1; // JIT inserted a profiler Enter callback + bool compPublishStubParam : 1; // EAX captured in prolog will be available through an intrinsic + bool compHasNextCallRetAddr : 1; // The NextCallReturnAddress intrinsic is used. + bool compUsesAsyncContinuation : 1; // The AsyncCallContinuation intrinsic is used. var_types compRetType; // Return type of the method as declared in IL (including SIMD normalization) var_types compRetNativeType; // Normalized return type as per target arch ABI @@ -10852,6 +10872,11 @@ class Compiler #endif // TARGET_AMD64 } + bool compIsAsync() const + { + return opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ASYNC); + } + //------------------------------------------------------------------------ // compMethodReturnsMultiRegRetType: Does this method return a multi-reg value? // @@ -10876,6 +10901,13 @@ class Compiler bool compObjectStackAllocation() { + if (compIsAsync()) + { + // Object stack allocation takes the address of locals around + // suspension points. Disable entirely for now. + return false; + } + return (JitConfig.JitObjectStackAllocation() != 0); } @@ -11831,6 +11863,7 @@ class GenTreeVisitor // Leaf nodes case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_LABEL: case GT_FTN_ADDR: case GT_RET_EXPR: @@ -11907,6 +11940,7 @@ class GenTreeVisitor case GT_RETURNTRAP: case GT_FIELD_ADDR: case GT_RETURN: + case GT_RETURN_SUSPEND: case GT_RETFILT: case GT_RUNTIMELOOKUP: case GT_ARR_ADDR: diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index fd341fde235c0a..debab70fd74fda 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -1552,7 +1552,7 @@ inline GenTree* Compiler::gtNewIconEmbFldHndNode(CORINFO_FIELD_HANDLE fldHnd) // New CT_HELPER node // inline GenTreeCall* Compiler::gtNewHelperCallNode( - unsigned helper, var_types type, GenTree* arg1, GenTree* arg2, GenTree* arg3) + unsigned helper, var_types type, GenTree* arg1, GenTree* arg2, GenTree* arg3, GenTree* arg4) { GenTreeCall* const result = gtNewCallNode(CT_HELPER, eeFindHelper(helper), type); @@ -1571,6 +1571,12 @@ inline GenTreeCall* Compiler::gtNewHelperCallNode( result->gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER; #endif + if (arg4 != nullptr) + { + result->gtArgs.PushFront(this, NewCallArg::Primitive(arg4)); + result->gtFlags |= arg4->gtFlags & GTF_ALL_EFFECT; + } + if (arg3 != nullptr) { result->gtArgs.PushFront(this, NewCallArg::Primitive(arg3)); @@ -2921,6 +2927,12 @@ inline unsigned Compiler::compMapILargNum(unsigned ILargNum) assert(ILargNum < info.compLocalsCount); // compLocals count already adjusted. } + if (ILargNum >= lvaAsyncContinuationArg) + { + ILargNum++; + assert(ILargNum < info.compLocalsCount); // compLocals count already adjusted. + } + if (ILargNum >= lvaVarargsHandleArg) { ILargNum++; @@ -4372,6 +4384,7 @@ void GenTree::VisitOperands(TVisitor visitor) case GT_LCL_FLD: case GT_LCL_ADDR: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_LABEL: case GT_FTN_ADDR: case GT_RET_EXPR: @@ -4452,6 +4465,7 @@ void GenTree::VisitOperands(TVisitor visitor) case GT_RETURNTRAP: case GT_KEEPALIVE: case GT_INC_SATURATE: + case GT_RETURN_SUSPEND: visitor(this->AsUnOp()->gtOp1); return; diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h index 8b3f84a0cf3a45..eb2c0dffc0ee6e 100644 --- a/src/coreclr/jit/compmemkind.h +++ b/src/coreclr/jit/compmemkind.h @@ -66,6 +66,7 @@ CompMemKindMacro(ZeroInit) CompMemKindMacro(Pgo) CompMemKindMacro(MaskConversionOpt) CompMemKindMacro(TryRegionClone) +CompMemKindMacro(Async) CompMemKindMacro(RangeCheckCloning) //clang-format on diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 21915bf4a13a2f..2c816dc0ef9a43 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -119,6 +119,7 @@ CompPhaseNameMacro(PHASE_RATIONALIZE, "Rationalize IR", CompPhaseNameMacro(PHASE_REPAIR_PROFILE_POST_MORPH, "Repair profile post-morph", false, -1, false) CompPhaseNameMacro(PHASE_REPAIR_PROFILE_PRE_LAYOUT, "Repair profile pre-layout", false, -1, false) +CompPhaseNameMacro(PHASE_ASYNC, "Transform async", false, -1, true) CompPhaseNameMacro(PHASE_LCLVARLIVENESS, "Local var liveness", true, -1, false) CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INIT, "Local var liveness init", false, PHASE_LCLVARLIVENESS, false) CompPhaseNameMacro(PHASE_LCLVARLIVENESS_PERBLOCK, "Per block local var liveness", false, PHASE_LCLVARLIVENESS, false) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index e4776520ef56e1..88d338c2d04438 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -3590,7 +3590,8 @@ emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSizeIn - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)) + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet) { emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE; @@ -3614,7 +3615,8 @@ emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt, (argCnt > ID_MAX_SMALL_CNS) || // too many args (argCnt < 0) // caller pops arguments // There is a second ref/byref return register. - MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize))) + MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize)) || + hasAsyncRet) { instrDescCGCA* id; @@ -3631,6 +3633,7 @@ emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt, #if MULTIREG_HAS_SECOND_GC_RET emitSetSecondRetRegGCType(id, secondRetSize); #endif // MULTIREG_HAS_SECOND_GC_RET + id->hasAsyncContinuationRet(hasAsyncRet); return id; } @@ -3674,7 +3677,8 @@ emitter::instrDesc* emitter::emitNewInstrCallDir(int argCnt, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSizeIn - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)) + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet) { emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE; @@ -3694,7 +3698,8 @@ emitter::instrDesc* emitter::emitNewInstrCallDir(int argCnt, (argCnt > ID_MAX_SMALL_CNS) || // too many args (argCnt < 0) // caller pops arguments // There is a second ref/byref return register. - MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize))) + MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize)) || + hasAsyncRet) { instrDescCGCA* id = emitAllocInstrCGCA(retSize); @@ -3711,6 +3716,7 @@ emitter::instrDesc* emitter::emitNewInstrCallDir(int argCnt, #if MULTIREG_HAS_SECOND_GC_RET emitSetSecondRetRegGCType(id, secondRetSize); #endif // MULTIREG_HAS_SECOND_GC_RET + id->hasAsyncContinuationRet(hasAsyncRet); return id; } diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 80ac961a3a6d58..3afb0759cf54fc 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -473,6 +473,7 @@ struct EmitCallParams emitAttr retSize = EA_PTRSIZE; // For multi-reg args with GC returns in the second arg emitAttr secondRetSize = EA_UNKNOWN; + bool hasAsyncRet = false; BitVec ptrVars = BitVecOps::UninitVal(); regMaskTP gcrefRegs = RBM_NONE; regMaskTP byrefRegs = RBM_NONE; @@ -2285,8 +2286,19 @@ class emitter { _idcSecondRetRegGCType = gctype; } +#endif + + bool hasAsyncContinuationRet() const + { + return _hasAsyncContinuationRet; + } + void hasAsyncContinuationRet(bool value) + { + _hasAsyncContinuationRet = value; + } private: +#if MULTIREG_HAS_SECOND_GC_RET // This member stores the GC-ness of the second register in a 2 register returned struct on System V. // It is added to the call struct since it is not needed by the base instrDesc struct, which keeps GC-ness // of the first register for the instCall nodes. @@ -2296,6 +2308,7 @@ class emitter // The base struct's member keeping the GC-ness of the first return register is _idGCref. GCtype _idcSecondRetRegGCType : 2; // ... GC type for the second return register. #endif // MULTIREG_HAS_SECOND_GC_RET + bool _hasAsyncContinuationRet : 1; }; // TODO-Cleanup: Uses of stack-allocated instrDescs should be refactored to be unnecessary. diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index 68a02be3e19c77..c2656322b49701 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -4725,7 +4725,8 @@ void emitter::emitIns_Call(const EmitCallParams& params) { /* Indirect call, virtual calls */ - id = emitNewInstrCallInd(argCnt, 0 /* disp */, params.ptrVars, gcrefRegs, byrefRegs, params.retSize); + id = emitNewInstrCallInd(argCnt, 0 /* disp */, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, + params.hasAsyncRet); } else { @@ -4734,7 +4735,7 @@ void emitter::emitIns_Call(const EmitCallParams& params) assert(params.callType == EC_FUNC_TOKEN); - id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize); + id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ @@ -6531,6 +6532,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) else if (id->idGCref() == GCT_BYREF) byrefRegs |= RBM_R0; + if (id->idIsLargeCall() && ((instrDescCGCA*)id)->hasAsyncContinuationRet()) + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + // If the GC register set has changed, report the new set. if (gcrefRegs != emitThisGCrefRegs) emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst); diff --git a/src/coreclr/jit/emitarm.h b/src/coreclr/jit/emitarm.h index 829955ba25c8a1..6e3eb5793c54aa 100644 --- a/src/coreclr/jit/emitarm.h +++ b/src/coreclr/jit/emitarm.h @@ -65,10 +65,15 @@ void emitDispInsHelp(instrDesc* id, private: instrDesc* emitNewInstrCallDir( - int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize); - -instrDesc* emitNewInstrCallInd( - int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize); + int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize, bool hasAsyncRet); + +instrDesc* emitNewInstrCallInd(int argCnt, + ssize_t disp, + VARSET_VALARG_TP GCvars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + emitAttr retSize, + bool hasAsyncRet); /************************************************************************/ /* Private helpers for instruction output */ diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 7bfadecc901076..2b2fe164da1a2c 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9155,7 +9155,7 @@ void emitter::emitIns_Call(const EmitCallParams& params) /* Indirect call, virtual calls */ id = emitNewInstrCallInd(argCnt, 0 /* disp */, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, - params.secondRetSize); + params.secondRetSize, params.hasAsyncRet); } else { @@ -9164,7 +9164,8 @@ void emitter::emitIns_Call(const EmitCallParams& params) assert(params.callType == EC_FUNC_TOKEN); - id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize); + id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize, + params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ @@ -10894,6 +10895,10 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t { byrefRegs |= RBM_INTRET_1; } + if (idCall->hasAsyncContinuationRet()) + { + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + } } // If the GC register set has changed, report the new set. diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 05a46ee000c200..c30ab5a57dec82 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -98,7 +98,8 @@ instrDesc* emitNewInstrCallDir(int argCnt, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize, - emitAttr secondRetSize); + emitAttr secondRetSize, + bool hasAsyncRet); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, @@ -106,7 +107,8 @@ instrDesc* emitNewInstrCallInd(int argCnt, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize, - emitAttr secondRetSize); + emitAttr secondRetSize, + bool hasAsyncRet); /************************************************************************/ /* enum to allow instruction optimisation to specify register order */ diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index e6c85ce08065fe..85c5589212dbf1 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -2436,7 +2436,7 @@ void emitter::emitIns_Call(const EmitCallParams& params) assert(params.callType == EC_INDIR_R); id = emitNewInstrCallInd(argCnt, params.disp, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, - params.secondRetSize); + params.secondRetSize, params.hasAsyncRet); } else { @@ -2445,7 +2445,8 @@ void emitter::emitIns_Call(const EmitCallParams& params) assert(params.callType == EC_FUNC_TOKEN); - id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize); + id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize, + params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ @@ -2713,6 +2714,10 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t { byrefRegs |= RBM_INTRET_1; } + if (idCall->hasAsyncContinuationRet()) + { + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + } } // If the GC register set has changed, report the new set. diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h index 4cc730c51f1ae2..49f7bb702a7422 100644 --- a/src/coreclr/jit/emitloongarch64.h +++ b/src/coreclr/jit/emitloongarch64.h @@ -117,14 +117,16 @@ instrDesc* emitNewInstrCallDir(int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); /************************************************************************/ /* Private helpers for instruction output */ diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index 1d769a8cd38e19..4c9bd307938855 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -1682,7 +1682,7 @@ void emitter::emitIns_Call(const EmitCallParams& params) assert(params.callType == EC_INDIR_R); id = emitNewInstrCallInd(argCnt, params.disp, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, - params.secondRetSize); + params.secondRetSize, params.hasAsyncRet); } else { @@ -1691,7 +1691,8 @@ void emitter::emitIns_Call(const EmitCallParams& params) assert(params.callType == EC_FUNC_TOKEN); - id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize); + id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, params.retSize, params.secondRetSize, + params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ @@ -2012,6 +2013,10 @@ unsigned emitter::emitOutputCall(const insGroup* ig, BYTE* dst, instrDesc* id, c { byrefRegs |= RBM_INTRET_1; } + if (idCall->hasAsyncContinuationRet()) + { + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + } } // If the GC register set has changed, report the new set. diff --git a/src/coreclr/jit/emitriscv64.h b/src/coreclr/jit/emitriscv64.h index 249ae35951cc37..cbb2b11ec96747 100644 --- a/src/coreclr/jit/emitriscv64.h +++ b/src/coreclr/jit/emitriscv64.h @@ -42,14 +42,16 @@ instrDesc* emitNewInstrCallDir(int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); /************************************************************************/ /* Private helpers for instruction output */ diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index ef2594be83d116..ea32168c74bf9d 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -11165,7 +11165,8 @@ void emitter::emitIns_Call(const EmitCallParams& params) /* Indirect call, virtual calls */ id = emitNewInstrCallInd(argCnt, params.disp, params.ptrVars, gcrefRegs, byrefRegs, - params.retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(params.secondRetSize)); + params.retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(params.secondRetSize), + params.hasAsyncRet); } else { @@ -11175,7 +11176,8 @@ void emitter::emitIns_Call(const EmitCallParams& params) assert(params.callType == EC_FUNC_TOKEN || params.callType == EC_FUNC_TOKEN_INDIR); id = emitNewInstrCallDir(argCnt, params.ptrVars, gcrefRegs, byrefRegs, - params.retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(params.secondRetSize)); + params.retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(params.secondRetSize), + params.hasAsyncRet); } /* Update the emitter's live GC ref sets */ @@ -18577,11 +18579,11 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) byrefRegs |= RBM_EAX; } -#ifdef UNIX_AMD64_ABI // If is a multi-register return method is called, mark RDX appropriately (for System V AMD64). if (id->idIsLargeCall()) { instrDescCGCA* idCall = (instrDescCGCA*)id; +#ifdef UNIX_AMD64_ABI if (idCall->idSecondGCref() == GCT_GCREF) { gcrefRegs |= RBM_RDX; @@ -18590,8 +18592,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { byrefRegs |= RBM_RDX; } - } #endif // UNIX_AMD64_ABI + if (idCall->hasAsyncContinuationRet()) + { + gcrefRegs |= RBM_ASYNC_CONTINUATION_RET; + } + } // If the GC register set has changed, report the new set if (gcrefRegs != emitThisGCrefRegs) diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 90112a316b2c96..ee5f4b0e9d6ea8 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -756,14 +756,16 @@ instrDesc* emitNewInstrCallDir(int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + bool hasAsyncRet); void emitGetInsCns(const instrDesc* id, CnsVal* cv) const; ssize_t emitGetInsAmdCns(const instrDesc* id, CnsVal* cv) const; diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index b3bcdbed1b3139..293e55f4b2cda9 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -2610,7 +2610,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed // change. The original this (info.compThisArg) then remains // unmodified in the method. fgAddInternal is responsible for // adding the code to copy the initial this into the temp. - +// void Compiler::fgAdjustForAddressExposedOrWrittenThis() { LclVarDsc* thisVarDsc = lvaGetDesc(info.compThisArg); @@ -4904,7 +4904,9 @@ BasicBlock* Compiler::fgSplitBlockAtBeginning(BasicBlock* curr) if (curr->IsLIR()) { newBlock->SetFirstLIRNode(curr->GetFirstLIRNode()); + newBlock->SetLastLIRNode(curr->GetLastLIRNode()); curr->SetFirstLIRNode(nullptr); + curr->SetLastLIRNode(nullptr); } else { diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 5410d946e97840..3a00afe819eb99 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -2706,6 +2706,14 @@ bool BBPredsChecker::CheckEhTryDsc(BasicBlock* block, BasicBlock* blockPred, EHb return true; } + // Async resumptions are allowed to jump into try blocks at any point. They + // are introduced late enough that the invariant of single entry is no + // longer necessary. + if (blockPred->HasFlag(BBF_ASYNC_RESUMPTION)) + { + return true; + } + printf("Jump into the middle of try region: " FMT_BB " branches to " FMT_BB "\n", blockPred->bbNum, block->bbNum); assert(!"Jump into middle of try region"); return false; @@ -3102,7 +3110,8 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef // A branch or fall-through to a BBJ_CALLFINALLY block must come from the `try` region associated // with the finally block the BBJ_CALLFINALLY is targeting. There is one special case: if the // BBJ_CALLFINALLY is the first block of a `try`, then its predecessor can be outside the `try`: - // either a branch or fall-through to the first block. + // either a branch or fall-through to the first block. Similarly internal resumption blocks for + // async are allowed to do this as they are introduced late enough that we no longer need the invariant. // // Note that this IR condition is a choice. It naturally occurs when importing EH constructs. // This condition prevents flow optimizations from skipping blocks in a `try` and branching @@ -3140,7 +3149,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef } else { - assert(bbInTryRegions(finallyIndex, block)); + assert(bbInTryRegions(finallyIndex, block) || block->HasFlag(BBF_ASYNC_RESUMPTION)); } } } @@ -3357,6 +3366,7 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block) break; case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: expectedFlags |= GTF_ORDER_SIDEEFF; break; @@ -3629,6 +3639,10 @@ void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, Statement* stmt) // The root of the tree should have GTF_ORDER_SIDEEFF set noway_assert(stmt->GetRootNode()->gtFlags & GTF_ORDER_SIDEEFF); } + else if (tree->OperIs(GT_ASYNC_CONTINUATION)) + { + assert(tree->gtFlags & GTF_ORDER_SIDEEFF); + } } if (tree->OperIsUnary() && tree->AsOp()->gtOp1) diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index e6be97f1d38e74..fdaff26c9b93d1 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -1072,6 +1072,14 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result, return; } + if (call->IsAsync() && info.compUsesAsyncContinuation) + { + // Currently not supported. Could provide a nice perf benefit for + // Task -> runtime async thunks if we supported it. + result->NoteFatal(InlineObservation::CALLER_ASYNC_USED_CONTINUATION); + return; + } + // impMarkInlineCandidate() is expected not to mark tail prefixed calls // and recursive tail calls as inline candidates. noway_assert(!call->IsTailPrefixedCall()); @@ -2229,6 +2237,7 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) switch (arg.GetWellKnownArg()) { case WellKnownArg::RetBuffer: + case WellKnownArg::AsyncContinuation: continue; case WellKnownArg::InstParam: argInfo = inlineInfo->inlInstParamArgInfo; diff --git a/src/coreclr/jit/fgstmt.cpp b/src/coreclr/jit/fgstmt.cpp index 85809339965ff0..f5ab387e262416 100644 --- a/src/coreclr/jit/fgstmt.cpp +++ b/src/coreclr/jit/fgstmt.cpp @@ -539,6 +539,7 @@ inline bool OperIsControlFlow(genTreeOps oper) case GT_RETURN: case GT_RETFILT: case GT_SWIFT_ERROR_RET: + case GT_RETURN_SUSPEND: #if defined(FEATURE_EH_WINDOWS_X86) case GT_END_LFIN: #endif // FEATURE_EH_WINDOWS_X86 diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 2df3dc4da081ee..b8f7ce9774e786 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2449,7 +2449,18 @@ PhaseStatus Compiler::fgAddInternal() } else { - merger.SetMaxReturns(MergedReturns::ReturnCountHardLimit); + unsigned limit = MergedReturns::ReturnCountHardLimit; +#ifdef JIT32_GCENCODER + // For the jit32 GC encoder the limit is an actual hard limit. In + // async functions we will be introducing another return during + // the async transformation, so make sure there's a free epilog + // for it. + if (compIsAsync()) + { + limit--; + } +#endif + merger.SetMaxReturns(limit); } } diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index c54e53d27bde49..4f004ed800c935 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -502,7 +502,7 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) // Can't substitute GT_LCLHEAP. // // Don't substitute a no return call (trips up morph in some cases). - if (fwdSubNode->OperIs(GT_CATCH_ARG, GT_LCLHEAP)) + if (fwdSubNode->OperIs(GT_CATCH_ARG, GT_LCLHEAP, GT_ASYNC_CONTINUATION)) { JITDUMP(" tree to sub is catch arg, or lcl heap\n"); return false; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 67d22e9ac47164..8754c2b24df809 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -2247,6 +2247,36 @@ bool GenTreeCall::HasSideEffects(Compiler* compiler, bool ignoreExceptions, bool (!helperProperties.IsAllocator(helper) || ((gtCallMoreFlags & GTF_CALL_M_ALLOC_SIDE_EFFECTS) != 0)); } +//------------------------------------------------------------------------- +// IsAsync: Whether or not this call is to an async function. +// +// Return Value: +// True if so. +// +// Remarks: +// async involves passing an async continuation as a separate argument and +// returning an async continuation in REG_ASYNC_CONTINUATION_RET. +// +// The async continuation is usually JIT added +// (WellKnownArg::AsyncContinuation). This is the case for an async method +// calling another async method by normal means. However, the VM also creates +// stubs that call async methods through calli where the async continuations +// are passed explicitly. See CEEJitInfo::getAsyncResumptionStub and +// MethodDesc::EmitTaskReturningThunk for examples. In +// those cases the JIT does not know (and does not need to know) which arg is +// the async continuation. +// +// The VM also uses the StubHelpers.AsyncCallContinuation() intrinsic in the +// stubs discussed above. The JIT must take care in those cases to still mark +// the preceding call as an async call; this is required for correct LSRA +// behavior and GC reporting around the returned async continuation. This is +// currently done in lowering; see LowerAsyncContinuation(). +// +bool GenTreeCall::IsAsync() const +{ + return (gtCallMoreFlags & GTF_CALL_M_ASYNC) != 0; +} + //------------------------------------------------------------------------- // HasNonStandardAddedArgs: Return true if the method has non-standard args added to the call // argument list during argument morphing (fgMorphArgs), e.g., passed in R10 or R11 on AMD64. @@ -6563,6 +6593,7 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse) case GT_LCL_FLD: case GT_LCL_ADDR: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_LABEL: case GT_FTN_ADDR: case GT_RET_EXPR: @@ -6628,6 +6659,7 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse) case GT_RETURNTRAP: case GT_RETURN: case GT_RETFILT: + case GT_RETURN_SUSPEND: case GT_BSWAP: case GT_BSWAP16: case GT_KEEPALIVE: @@ -6923,10 +6955,10 @@ bool GenTree::OperRequiresCallFlag(Compiler* comp) const switch (gtOper) { case GT_CALL: - return true; - case GT_GCPOLL: case GT_KEEPALIVE: + case GT_ASYNC_CONTINUATION: + case GT_RETURN_SUSPEND: return true; case GT_SWIFT_ERROR: @@ -7256,6 +7288,8 @@ bool GenTree::OperRequiresGlobRefFlag(Compiler* comp) const case GT_CMPXCHG: case GT_MEMORYBARRIER: case GT_KEEPALIVE: + case GT_ASYNC_CONTINUATION: + case GT_RETURN_SUSPEND: case GT_SWIFT_ERROR: case GT_GCPOLL: return true; @@ -7316,6 +7350,8 @@ bool GenTree::OperSupportsOrderingSideEffect() const case GT_CMPXCHG: case GT_MEMORYBARRIER: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: + case GT_RETURN_SUSPEND: case GT_SWIFT_ERROR: return true; default: @@ -9429,6 +9465,7 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree) goto DONE; case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_NO_OP: case GT_NOP: case GT_LABEL: @@ -10170,6 +10207,7 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) case GT_LCL_FLD: case GT_LCL_ADDR: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_LABEL: case GT_FTN_ADDR: case GT_RET_EXPR: @@ -10241,6 +10279,7 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) case GT_PUTARG_SPLIT: #endif // FEATURE_ARG_SPLIT case GT_RETURNTRAP: + case GT_RETURN_SUSPEND: m_edge = &m_node->AsUnOp()->gtOp1; assert(*m_edge != nullptr); m_advance = &GenTreeUseEdgeIterator::Terminate; @@ -11776,6 +11815,10 @@ void Compiler::gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, cons { ilName = "this"; } + else if (lclNum == lvaAsyncContinuationArg) + { + ilName = "AsyncCont"; + } else { ilKind = "arg"; @@ -12336,6 +12379,7 @@ void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack) case GT_START_PREEMPTGC: case GT_PROF_HOOK: case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_MEMORYBARRIER: case GT_PINVOKE_PROLOG: case GT_JMPTABLE: @@ -13083,6 +13127,8 @@ const char* Compiler::gtGetWellKnownArgNameForArgMsg(WellKnownArg arg) return "va cookie"; case WellKnownArg::InstParam: return "gctx"; + case WellKnownArg::AsyncContinuation: + return "async"; case WellKnownArg::RetBuffer: return "retbuf"; case WellKnownArg::PInvokeFrame: diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index f3fb94b09429e8..e57d334a2805b5 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4238,6 +4238,7 @@ enum GenTreeCallFlags : unsigned int GTF_CALL_M_GUARDED_DEVIRT_CHAIN = 0x00080000, // this call is a candidate for chained guarded devirtualization GTF_CALL_M_ALLOC_SIDE_EFFECTS = 0x00100000, // this is a call to an allocator with side effects GTF_CALL_M_SUPPRESS_GC_TRANSITION = 0x00200000, // suppress the GC transition (i.e. during a pinvoke) but a separate GC safe point is required. + GTF_CALL_M_ASYNC = 0x00400000, // this call is a runtime async method call and thus a suspension point GTF_CALL_M_EXPANDED_EARLY = 0x00800000, // the Virtual Call target address is expanded and placed in gtControlExpr in Morph rather than in Lower GTF_CALL_M_LDVIRTFTN_INTERFACE = 0x01000000, // ldvirtftn on an interface type GTF_CALL_M_CAST_CAN_BE_EXPANDED = 0x02000000, // this cast (helper call) can be expanded if it's profitable. To be removed. @@ -4563,6 +4564,7 @@ enum class WellKnownArg : unsigned ThisPointer, VarArgsCookie, InstParam, + AsyncContinuation, RetBuffer, PInvokeFrame, WrapperDelegateCell, @@ -5019,6 +5021,13 @@ struct GenTreeCall final : public GenTree #endif } + void SetIsAsync() + { + gtCallMoreFlags |= GTF_CALL_M_ASYNC; + } + + bool IsAsync() const; + //--------------------------------------------------------------------------- // GetRegNumByIdx: get i'th return register allocated to this call node. // diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index 5a48e4b17aaa9a..4bf68fbb838e1d 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -33,6 +33,7 @@ GTNODE(LCL_ADDR , GenTreeLclFld ,0,0,GTK_LEAF) // local //----------------------------------------------------------------------------- GTNODE(CATCH_ARG , GenTree ,0,0,GTK_LEAF) // Exception object in a catch block +GTNODE(ASYNC_CONTINUATION, GenTree ,0,0,GTK_LEAF) // Access returned continuation by an async call GTNODE(LABEL , GenTree ,0,0,GTK_LEAF) // Jump-target GTNODE(JMP , GenTreeVal ,0,0,GTK_LEAF|GTK_NOVALUE) // Jump to another function GTNODE(FTN_ADDR , GenTreeFptrVal ,0,0,GTK_LEAF) // Address of a function @@ -313,6 +314,11 @@ GTNODE(RETURN , GenTreeOp ,0,1,GTK_UNOP|GTK_NOVALUE) GTNODE(SWITCH , GenTreeOp ,0,1,GTK_UNOP|GTK_NOVALUE) GTNODE(NO_OP , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE) // A NOP that cannot be deleted. +// Suspend an async method, returning a continuation. +// Before lowering this is a seemingly normal TYP_VOID node with a lot of side effects (GTF_CALL | GTF_GLOB_REF | GTF_ORDER_SIDEEFF). +// Lowering then removes all successor nodes and leaves it as the terminator node. +GTNODE(RETURN_SUSPEND , GenTreeOp ,0,1,GTK_UNOP|GTK_NOVALUE) // Return a continuation in an async method + GTNODE(START_NONGC , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Starts a new instruction group that will be non-gc interruptible. GTNODE(START_PREEMPTGC , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Starts a new instruction group where preemptive GC is enabled. GTNODE(PROF_HOOK , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Profiler Enter/Leave/TailCall hook. diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 0f609731675632..4585033f8e2f70 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -846,7 +846,8 @@ GenTree* Compiler::impStoreStruct(GenTree* store, // Make sure we don't pass something other than a local address to the return buffer arg. // It is allowed to pass current's method return buffer as it is a local too. - if (fgAddrCouldBeHeap(destAddr) && !eeIsByrefLike(srcCall->gtRetClsHnd)) + if ((fgAddrCouldBeHeap(destAddr) && !eeIsByrefLike(srcCall->gtRetClsHnd)) || + (compIsAsync() && !destAddr->OperIs(GT_LCL_ADDR))) { unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer")); lvaSetStruct(tmp, srcCall->gtRetClsHnd, false); @@ -972,7 +973,8 @@ GenTree* Compiler::impStoreStruct(GenTree* store, // Make sure we don't pass something other than a local address to the return buffer arg. // It is allowed to pass current's method return buffer as it is a local too. - if (fgAddrCouldBeHeap(destAddr) && !eeIsByrefLike(call->gtRetClsHnd)) + if ((fgAddrCouldBeHeap(destAddr) && !eeIsByrefLike(call->gtRetClsHnd)) || + (compIsAsync() && !destAddr->OperIs(GT_LCL_ADDR))) { unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer")); lvaSetStruct(tmp, call->gtRetClsHnd, false); @@ -5970,6 +5972,85 @@ bool Compiler::impBlockIsInALoop(BasicBlock* block) block->HasFlag(BBF_BACKWARD_JUMP); } +//------------------------------------------------------------------------ +// impMatchAwaitPattern: check if a method call starts an Await pattern +// that can be optimized for runtime async +// +// Arguments: +// codeAddr - IL after call[virt] +// codeEndp - End of IL code stream +// configVal - [out] set to 0 or 1, accordingly, if we saw ConfigureAwait(0|1) +// +// Returns: +// true if this is an Await that we can optimize +// +bool Compiler::impMatchAwaitPattern(const BYTE* codeAddr, const BYTE* codeEndp, int* configVal) +{ + // If we see the following code pattern in runtime async methods: + // + // call[virt] + // [ OPTIONAL ] + // ldc.i4.0 / ldc.i4.1 + // call[virt] + // call + // + // We emit an eqivalent of: + // + // call[virt] + // + // where "RtMethod" is the runtime-async counterpart of a Task-returning method. + // + // NOTE: we could potentially check if Method is not a thunk and, in cases when we can tell, + // bypass this optimization. Otherwise in a non-thunk case we would be + // replacing the pattern with a call to a thunk, which contains roughly the same code. + + const BYTE* nextOpcode = codeAddr + sizeof(mdToken); + // There must be enough space after ldc for {call + tk + call + tk} + if (nextOpcode + 2 * (1 + sizeof(mdToken)) < codeEndp) + { + uint8_t nextOp = getU1LittleEndian(nextOpcode); + uint8_t nextNextOp = getU1LittleEndian(nextOpcode + 1); + if ((nextOp != CEE_LDC_I4_0 && nextOp != CEE_LDC_I4_1) || + (nextNextOp != CEE_CALL && nextNextOp != CEE_CALLVIRT)) + { + goto checkForAwait; + } + + // check if the token after {ldc, call[virt]} is ConfigAwait + CORINFO_RESOLVED_TOKEN nextCallTok; + impResolveToken(nextOpcode + 2, &nextCallTok, CORINFO_TOKENKIND_Method); + + if (!eeIsIntrinsic(nextCallTok.hMethod) || + lookupNamedIntrinsic(nextCallTok.hMethod) != NI_System_Threading_Tasks_Task_ConfigureAwait) + { + goto checkForAwait; + } + + *configVal = nextOp == CEE_LDC_I4_0 ? 0 : 1; + // skip {ldc; call; } + nextOpcode += 1 + 1 + sizeof(mdToken); + } + +checkForAwait: + + if ((nextOpcode + sizeof(mdToken) < codeEndp) && (getU1LittleEndian(nextOpcode) == CEE_CALL)) + { + // resolve the next token + CORINFO_RESOLVED_TOKEN nextCallTok; + impResolveToken(nextOpcode + 1, &nextCallTok, CORINFO_TOKENKIND_Method); + + // check if it is an Await intrinsic + if (eeIsIntrinsic(nextCallTok.hMethod) && + lookupNamedIntrinsic(nextCallTok.hMethod) == NI_System_Runtime_CompilerServices_RuntimeHelpers_Await) + { + // yes, this is an Await + return true; + } + } + + return false; +} + #ifdef _PREFAST_ #pragma warning(push) #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function @@ -8986,7 +9067,41 @@ void Compiler::impImportBlockCode(BasicBlock* block) // many other places. We unfortunately embed that knowledge here. if (opcode != CEE_CALLI) { - _impResolveToken(CORINFO_TOKENKIND_Method); + bool isAwait = false; + // TODO: The configVal should be wired to the actual implementation + // that control the flow of sync context. + // We do not have that yet. + int configVal = -1; // -1 not configured, 0/1 configured to false/true + if (compIsAsync() && JitConfig.JitOptimizeAwait()) + { + isAwait = impMatchAwaitPattern(codeAddr, codeEndp, &configVal); + } + + if (isAwait) + { + _impResolveToken(CORINFO_TOKENKIND_Await); + if (resolvedToken.hMethod != NULL) + { + // There is a runtime async variant that is implicitly awaitable, just call that. + // if configured, skip {ldc call ConfigureAwait} + if (configVal >= 0) + codeAddr += 2 + sizeof(mdToken); + + // Skip the call to `Await` + codeAddr += 1 + sizeof(mdToken); + } + else + { + // This can happen in rare cases when the Task-returning method is not a runtime Async + // function. For example "T M1(T arg) => arg" when called with a Task argument. Treat + // that as a regualr call that is Awaited + _impResolveToken(CORINFO_TOKENKIND_Method); + } + } + else + { + _impResolveToken(CORINFO_TOKENKIND_Method); + } eeGetCallInfo(&resolvedToken, (prefixFlags & PREFIX_CONSTRAINED) ? &constrainedResolvedToken : nullptr, @@ -13273,7 +13388,8 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) switch (arg.GetWellKnownArg()) { case WellKnownArg::RetBuffer: - // This does not appear in the table of inline arg info; do not include them + case WellKnownArg::AsyncContinuation: + // These do not appear in the table of inline arg info; do not include them continue; case WellKnownArg::InstParam: pInlineInfo->inlInstParamArgInfo = argInfo = new (this, CMK_Inlining) InlArgInfo{}; diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 63401e011f19d2..e457fc061f10fa 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -96,7 +96,8 @@ var_types Compiler::impImportCall(OPCODE opcode, bool bIntrinsicImported = false; CORINFO_SIG_INFO calliSig; - NewCallArg extraArg; + GenTree* varArgsCookie = nullptr; + GenTree* instParam = nullptr; // Swift calls that might throw use a SwiftError* arg that requires additional IR to handle, // so if we're importing a Swift call, look for this type in the signature @@ -711,12 +712,15 @@ var_types Compiler::impImportCall(OPCODE opcode, } } - /*------------------------------------------------------------------------- - * Create the argument list - */ + if (sig->isAsyncCall()) + { + call->AsCall()->SetIsAsync(); + } + + // Now create the argument list. //------------------------------------------------------------------------- - // Special case - for varargs we have an implicit last argument + // Special case - for varargs we have an extra argument if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG) { @@ -729,9 +733,7 @@ var_types Compiler::impImportCall(OPCODE opcode, varCookie = info.compCompHnd->getVarArgsHandle(sig, &pVarCookie); assert((!varCookie) != (!pVarCookie)); - GenTree* cookieNode = gtNewIconEmbHndNode(varCookie, pVarCookie, GTF_ICON_VARG_HDL, sig); - assert(extraArg.Node == nullptr); - extraArg = NewCallArg::Primitive(cookieNode).WellKnown(WellKnownArg::VarArgsCookie); + varArgsCookie = gtNewIconEmbHndNode(varCookie, pVarCookie, GTF_ICON_VARG_HDL, sig); } //------------------------------------------------------------------------- @@ -751,7 +753,7 @@ var_types Compiler::impImportCall(OPCODE opcode, // We also set the exact type context associated with the call so we can // inline the call correctly later on. - if (sig->callConv & CORINFO_CALLCONV_PARAMTYPE) + if (sig->hasTypeArg()) { assert(call->AsCall()->gtCallType == CT_USER_FUNC); if (clsHnd == nullptr) @@ -761,8 +763,7 @@ var_types Compiler::impImportCall(OPCODE opcode, assert(opcode != CEE_CALLI); - GenTree* instParam; - bool runtimeLookup; + bool runtimeLookup; // Instantiated generic method if (((SIZE_T)exactContextHnd & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_METHOD) @@ -852,9 +853,6 @@ var_types Compiler::impImportCall(OPCODE opcode, } } } - - assert(extraArg.Node == nullptr); - extraArg = NewCallArg::Primitive(instParam).WellKnown(WellKnownArg::InstParam); } if ((opcode == CEE_NEWOBJ) && ((clsFlags & CORINFO_FLG_DELEGATE) != 0)) @@ -890,18 +888,50 @@ var_types Compiler::impImportCall(OPCODE opcode, } impPopCallArgs(sig, call->AsCall()); - if (extraArg.Node != nullptr) + + // Extra args + if ((instParam != nullptr) || call->AsCall()->IsAsync() || (varArgsCookie != nullptr)) { if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L) { - call->AsCall()->gtArgs.PushFront(this, extraArg); + if (varArgsCookie != nullptr) + { + call->AsCall()->gtArgs.PushFront(this, NewCallArg::Primitive(varArgsCookie) + .WellKnown(WellKnownArg::VarArgsCookie)); + } + + if (call->AsCall()->IsAsync()) + { + call->AsCall()->gtArgs.PushFront(this, NewCallArg::Primitive(gtNewNull(), TYP_REF) + .WellKnown(WellKnownArg::AsyncContinuation)); + } + + if (instParam != nullptr) + { + call->AsCall()->gtArgs.PushFront(this, + NewCallArg::Primitive(instParam).WellKnown(WellKnownArg::InstParam)); + } } else { - call->AsCall()->gtArgs.PushBack(this, extraArg); - } + if (instParam != nullptr) + { + call->AsCall()->gtArgs.PushBack(this, + NewCallArg::Primitive(instParam).WellKnown(WellKnownArg::InstParam)); + } + + if (call->AsCall()->IsAsync()) + { + call->AsCall()->gtArgs.PushBack(this, NewCallArg::Primitive(gtNewNull(), TYP_REF) + .WellKnown(WellKnownArg::AsyncContinuation)); + } - call->gtFlags |= extraArg.Node->gtFlags & GTF_GLOB_EFFECT; + if (varArgsCookie != nullptr) + { + call->AsCall()->gtArgs.PushBack(this, NewCallArg::Primitive(varArgsCookie) + .WellKnown(WellKnownArg::VarArgsCookie)); + } + } } //------------------------------------------------------------------------- @@ -3303,6 +3333,32 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, return new (this, GT_LABEL) GenTree(GT_LABEL, TYP_I_IMPL); } + if (ni == NI_System_StubHelpers_AsyncCallContinuation) + { + GenTree* node = new (this, GT_ASYNC_CONTINUATION) GenTree(GT_ASYNC_CONTINUATION, TYP_REF); + node->SetHasOrderingSideEffect(); + node->gtFlags |= GTF_CALL | GTF_GLOB_REF; + info.compUsesAsyncContinuation = true; + return node; + } + + if (ni == NI_System_Runtime_CompilerServices_RuntimeHelpers_AsyncSuspend) + { + GenTree* node = gtNewOperNode(GT_RETURN_SUSPEND, TYP_VOID, impPopStack().val); + node->SetHasOrderingSideEffect(); + node->gtFlags |= GTF_CALL | GTF_GLOB_REF; + return node; + } + + if (ni == NI_System_Runtime_CompilerServices_RuntimeHelpers_Await) + { + // These are marked intrinsics simply to match them by name in + // the Await pattern optimization. Make sure we keep pIntrinsicName assigned + // (it would be overridden if we left this up to the rest of this function). + *pIntrinsicName = ni; + return nullptr; + } + bool betterToExpand = false; // Allow some lightweight intrinsics in Tier0 which can improve throughput @@ -11012,6 +11068,14 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_Runtime_CompilerServices_RuntimeHelpers_GetMethodTable; } + else if (strcmp(methodName, "Await") == 0) + { + result = NI_System_Runtime_CompilerServices_RuntimeHelpers_Await; + } + else if (strcmp(methodName, "AsyncSuspend") == 0) + { + result = NI_System_Runtime_CompilerServices_RuntimeHelpers_AsyncSuspend; + } } else if (strcmp(className, "StaticsHelpers") == 0) { @@ -11261,6 +11325,10 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_StubHelpers_NextCallReturnAddress; } + else if (strcmp(methodName, "AsyncCallContinuation") == 0) + { + result = NI_System_StubHelpers_AsyncCallContinuation; + } } } else if (strcmp(namespaceName, "Text") == 0) @@ -11338,6 +11406,17 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) } } } + else if (strcmp(namespaceName, "Threading.Tasks") == 0) + { + if (strcmp(methodName, "ConfigureAwait") == 0) + { + if (strcmp(className, "Task`1") == 0 || strcmp(className, "Task") == 0 || + strcmp(className, "ValuTask`1") == 0 || strcmp(className, "ValueTask") == 0) + { + result = NI_System_Threading_Tasks_Task_ConfigureAwait; + } + } + } } } else if (strcmp(namespaceName, "Internal.Runtime") == 0) diff --git a/src/coreclr/jit/inline.def b/src/coreclr/jit/inline.def index ba5e73626dce0e..6ce57ccfeed7ac 100644 --- a/src/coreclr/jit/inline.def +++ b/src/coreclr/jit/inline.def @@ -116,6 +116,7 @@ INLINE_OBSERVATION(UNSUPPORTED_OPCODE, bool, "unsupported opcode", INLINE_OBSERVATION(DEBUG_CODEGEN, bool, "debug codegen", FATAL, CALLER) INLINE_OBSERVATION(IS_JIT_NOINLINE, bool, "noinline per JitNoInlineRange", FATAL, CALLER) INLINE_OBSERVATION(USES_NEXT_CALL_RET_ADDR, bool, "uses NextCallReturnAddress intrinsic", FATAL, CALLER) +INLINE_OBSERVATION(ASYNC_USED_CONTINUATION, bool, "uses AsyncCallContinuation intrinsic", FATAL, CALLER) // ------ Caller Information ------- diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index c353f336f0ff4d..a44507fe9d7d9f 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -594,6 +594,8 @@ OPT_CONFIG_INTEGER(JitDoIfConversion, "JitDoIfConversion", 1) OPT_CONFIG_INTEGER(JitDoOptimizeMaskConversions, "JitDoOptimizeMaskConversions", 1) // Perform optimization of mask // conversions +RELEASE_CONFIG_INTEGER(JitOptimizeAwait, "JitOptimizeAwait", 1) // Perform optimization of Await intrinsics + RELEASE_CONFIG_INTEGER(JitEnableOptRepeat, "JitEnableOptRepeat", 1) // If zero, do not allow JitOptRepeat RELEASE_CONFIG_METHODSET(JitOptRepeat, "JitOptRepeat") // Runs optimizer multiple times on specified methods RELEASE_CONFIG_INTEGER(JitOptRepeatCount, "JitOptRepeatCount", 2) // Number of times to repeat opts when repeating diff --git a/src/coreclr/jit/jitee.h b/src/coreclr/jit/jitee.h index d3a37a2c4d40c2..c8f0e64f1f4a97 100644 --- a/src/coreclr/jit/jitee.h +++ b/src/coreclr/jit/jitee.h @@ -44,6 +44,7 @@ class JitFlags JIT_FLAG_SOFTFP_ABI = 30, // Enable armel calling convention #endif + JIT_FLAG_ASYNC = 31, // Generate code for use as an async function // Note: the mcs tool uses the currently unused upper flags bits when outputting SuperPMI MC file flags. // See EXTRA_JIT_FLAGS and spmidumphelper.cpp. Currently, these are bits 56 through 63. If they overlap, // something needs to change. @@ -141,6 +142,7 @@ class JitFlags FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_RELATIVE_CODE_RELOCS, JIT_FLAG_RELATIVE_CODE_RELOCS); FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_SOFTFP_ABI, JIT_FLAG_SOFTFP_ABI); #endif // TARGET_ARM + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_ASYNC, JIT_FLAG_ASYNC); #undef FLAGS_EQUAL } diff --git a/src/coreclr/jit/layout.cpp b/src/coreclr/jit/layout.cpp index 1160300ad6698c..a1ad460435caa7 100644 --- a/src/coreclr/jit/layout.cpp +++ b/src/coreclr/jit/layout.cpp @@ -567,6 +567,31 @@ ClassLayout* ClassLayout::Create(Compiler* compiler, const ClassLayoutBuilder& b return newLayout; } +//------------------------------------------------------------------------ +// HasGCByRef: // Check if this classlayout has a TYP_BYREF GC pointer in it. +// +// Return value: +// True if so. +// +bool ClassLayout::HasGCByRef() const +{ + if (!HasGCPtr()) + { + return false; + } + + unsigned numSlots = GetSlotCount(); + for (unsigned i = 0; i < numSlots; i++) + { + if (GetGCPtrType(i) == TYP_BYREF) + { + return true; + } + } + + return false; +} + //------------------------------------------------------------------------ // IsStackOnly: does the layout represent a block that can never be on the heap? // diff --git a/src/coreclr/jit/layout.h b/src/coreclr/jit/layout.h index 1f5918840057a4..ca367d5fb56307 100644 --- a/src/coreclr/jit/layout.h +++ b/src/coreclr/jit/layout.h @@ -224,6 +224,8 @@ class ClassLayout return m_gcPtrCount != 0; } + bool HasGCByRef() const; + bool IsStackOnly(Compiler* comp) const; bool IsGCPtr(unsigned slot) const diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 5625adf1cc9d1e..79aa47da44b318 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -36,19 +36,19 @@ void Compiler::lvaInitTypeRef() { /* x86 args look something like this: - [this ptr] [hidden return buffer] [declared arguments]* [generic context] [var arg cookie] + [this ptr] [hidden return buffer] [declared arguments]* [generic context] [async continuation] [var arg cookie] x64 is closer to the native ABI: - [this ptr] [hidden return buffer] [generic context] [var arg cookie] [declared arguments]* + [this ptr] [hidden return buffer] [generic context] [async continuation] [var arg cookie] [declared arguments]* (Note: prior to .NET Framework 4.5.1 for Windows 8.1 (but not .NET Framework 4.5.1 "downlevel"), the "hidden return buffer" came before the "this ptr". Now, the "this ptr" comes first. This is different from the C++ order, where the "hidden return buffer" always comes first.) ARM and ARM64 are the same as the current x64 convention: - [this ptr] [hidden return buffer] [generic context] [var arg cookie] [declared arguments]* + [this ptr] [hidden return buffer] [generic context] [async continuation] [var arg cookie] [declared arguments]* Key difference: - The var arg cookie and generic context are swapped with respect to the user arguments + The var arg cookie, generic context and async continuations are swapped with respect to the user arguments */ /* Set compArgsCount and compLocalsCount */ @@ -161,6 +161,11 @@ void Compiler::lvaInitTypeRef() info.compTypeCtxtArg = BAD_VAR_NUM; } + if (compIsAsync()) + { + info.compArgsCount++; + } + lvaCount = info.compLocalsCount = info.compArgsCount + info.compMethodInfo->locals.numArgs; info.compILlocalsCount = info.compILargsCount + info.compMethodInfo->locals.numArgs; @@ -371,6 +376,8 @@ void Compiler::lvaInitArgs(bool hasRetBuffArg) // and shared generic struct instance methods lvaInitGenericsCtxt(&varNum); + lvaInitAsyncContinuation(&varNum); + /* If the method is varargs, process the varargs cookie */ lvaInitVarArgsHandle(&varNum); #endif @@ -384,6 +391,8 @@ void Compiler::lvaInitArgs(bool hasRetBuffArg) // and shared generic struct instance methods lvaInitGenericsCtxt(&varNum); + lvaInitAsyncContinuation(&varNum); + /* If the method is varargs, process the varargs cookie */ lvaInitVarArgsHandle(&varNum); #endif @@ -676,6 +685,33 @@ void Compiler::lvaInitGenericsCtxt(unsigned* curVarNum) (*curVarNum)++; } +//----------------------------------------------------------------------------- +// lvaInitAsyncContinuation: +// Initialize the async continuation parameter. +// +// Type parameters: +// curVarNum - [in, out] The current local variable number for parameters +// +void Compiler::lvaInitAsyncContinuation(unsigned* curVarNum) +{ + if (!compIsAsync()) + { + return; + } + + lvaAsyncContinuationArg = *curVarNum; + LclVarDsc* varDsc = lvaGetDesc(*curVarNum); + varDsc->lvType = TYP_REF; + varDsc->lvIsParam = true; + + // The final home for this incoming register might be our local stack frame + varDsc->lvOnFrame = true; + + INDEBUG(varDsc->lvReason = "Async continuation arg"); + + (*curVarNum)++; +} + /*****************************************************************************/ void Compiler::lvaInitVarArgsHandle(unsigned* curVarNum) { diff --git a/src/coreclr/jit/lir.h b/src/coreclr/jit/lir.h index a3271e832fa8de..99d011ea32e9bc 100644 --- a/src/coreclr/jit/lir.h +++ b/src/coreclr/jit/lir.h @@ -284,6 +284,20 @@ class LIR final void InsertAtBeginning(Range&& range); void InsertAtEnd(Range&& range); + template + void InsertAtBeginning(GenTree* tree, Trees&&... rest) + { + InsertAtBeginning(std::forward(rest)...); + InsertAtBeginning(tree); + } + + template + void InsertAtEnd(GenTree* tree, Trees&&... rest) + { + InsertAtEnd(tree); + InsertAtEnd(std::forward(rest)...); + } + void Remove(GenTree* node, bool markOperandsUnused = false); Range Remove(GenTree* firstNode, GenTree* lastNode); Range Remove(ReadOnlyRange&& range); diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index b199d9b6612509..f1c7c9a1d1ef9e 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -1454,6 +1454,7 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR case GT_JCC: case GT_JTRUE: case GT_RETURN: + case GT_RETURN_SUSPEND: case GT_SWITCH: case GT_RETFILT: case GT_START_NONGC: diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index ace86eb26177d3..61de442555b8fc 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -731,6 +731,13 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_MDARR_LOWER_BOUND: return LowerArrLength(node->AsArrCommon()); + case GT_ASYNC_CONTINUATION: + return LowerAsyncContinuation(node); + + case GT_RETURN_SUSPEND: + LowerReturnSuspend(node); + break; + default: break; } @@ -5432,6 +5439,83 @@ void Lowering::LowerRetSingleRegStructLclVar(GenTreeUnOp* ret) } } +//---------------------------------------------------------------------------------------------- +// LowerAsyncContinuation: Lower a GT_ASYNC_CONTINUATION node +// +// Arguments: +// asyncCont - Async continuation node +// +// Returns: +// Next node to lower. +// +GenTree* Lowering::LowerAsyncContinuation(GenTree* asyncCont) +{ + assert(asyncCont->OperIs(GT_ASYNC_CONTINUATION)); + + GenTree* next = asyncCont->gtNext; + + // + // ASYNC_CONTINUATION is created from two sources: + // + // 1. The async resumption stubs are IL stubs created by the VM. These call + // runtime async functions via "calli", passing the continuation manually. + // They use the AsyncHelpers.AsyncCallContinuation intrinsic after the + // calli, which turns into the ASYNC_CONTINUATION node during import. + // + // 2. In the async transformation, ASYNC_CONTINUATION nodes are inserted + // after calls to async calls. + // + // In the former case nothing has marked the previous call as an "async" + // method. We need to do that here to ensure that the backend knows that + // the call has a non-standard calling convention that returns an + // additional GC ref. This requires additional GC tracking that we would + // otherwise not get. + // + GenTree* node = asyncCont; + while (true) + { + node = node->gtPrev; + noway_assert((node != nullptr) && "Ran out of nodes while looking for call before async continuation"); + + if (node->IsCall()) + { + if (!node->AsCall()->IsAsync()) + { + JITDUMP("Marking the call [%06u] before async continuation [%06u] as an async call\n", + Compiler::dspTreeID(node), Compiler::dspTreeID(asyncCont)); + node->AsCall()->SetIsAsync(); + } + + BlockRange().Remove(asyncCont); + BlockRange().InsertAfter(node, asyncCont); + break; + } + } + + return next; +} + +//---------------------------------------------------------------------------------------------- +// LowerReturnSuspend: +// Lower a GT_RETURN_SUSPEND by making it a terminator node. +// +// Arguments: +// node - The node +// +void Lowering::LowerReturnSuspend(GenTree* node) +{ + assert(node->OperIs(GT_RETURN_SUSPEND)); + while (BlockRange().LastNode() != node) + { + BlockRange().Remove(BlockRange().LastNode(), true); + } + + if (comp->compMethodRequiresPInvokeFrame()) + { + InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(node)); + } +} + //---------------------------------------------------------------------------------------------- // LowerCallStruct: Lowers a call node that returns a struct. // diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index a9db26b3ee4312..fb8c06ba6f6930 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -185,6 +185,8 @@ class Lowering final : public Phase GenTree* LowerStoreLocCommon(GenTreeLclVarCommon* lclVar); void LowerRetStruct(GenTreeUnOp* ret); void LowerRetSingleRegStructLclVar(GenTreeUnOp* ret); + GenTree* LowerAsyncContinuation(GenTree* asyncCont); + void LowerReturnSuspend(GenTree* retSuspend); void LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList); bool IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList); void LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 2dcdc5db3f79fa..2a547b11b25649 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1030,7 +1030,7 @@ class LinearScan : public LinearScanInterface // insert refpositions representing prolog zero-inits which will be added later void insertZeroInitRefPositions(); - void addKillForRegs(regMaskTP mask, LsraLocation currentLoc); + RefPosition* addKillForRegs(regMaskTP mask, LsraLocation currentLoc); void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition); @@ -2014,6 +2014,7 @@ class LinearScan : public LinearScanInterface int BuildPutArgReg(GenTreeUnOp* node); int BuildCall(GenTreeCall* call); void MarkSwiftErrorBusyForCall(GenTreeCall* call); + void MarkAsyncContinuationBusyForCall(GenTreeCall* call); int BuildCmp(GenTree* tree); int BuildCmpOperands(GenTree* tree); int BuildBlockStore(GenTreeBlk* blkNode); diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index 815f0149aede11..8b7e2a9e086dcd 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -631,6 +631,12 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + assert(dstCount == 1); + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + case GT_COPY: srcCount = 1; #ifdef TARGET_ARM @@ -693,6 +699,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_JCC: case GT_SETCC: case GT_MEMORYBARRIER: + case GT_RETURN_SUSPEND: srcCount = BuildSimple(tree); break; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index e79748f8555673..7baaef17ac484d 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1320,6 +1320,11 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + case GT_INDEX_ADDR: assert(dstCount == 1); srcCount = BuildBinaryUses(tree->AsOp()); diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index b3c6c7d4cf788d..13fc2bd1ede079 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -274,6 +274,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index b6b01ca38e63f8..28c2bea643bff2 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -686,7 +686,7 @@ bool LinearScan::isContainableMemoryOp(GenTree* node) // mask - the mask (set) of registers. // currentLoc - the location at which they should be added // -void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) +RefPosition* LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) { // The mask identifies a set of registers that will be used during // codegen. Mark these as modified here, so when we do final frame @@ -705,6 +705,8 @@ void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) *killTail = pos; killTail = &pos->nextRefPosition; + + return pos; } //------------------------------------------------------------------------ @@ -4741,3 +4743,24 @@ void LinearScan::MarkSwiftErrorBusyForCall(GenTreeCall* call) setDelayFree(swiftErrorRegRecord->lastRefPosition); } #endif + +//------------------------------------------------------------------------ +// MarkAsyncContinuationBusyForCall: +// Add a ref position that marks the async continuation register as busy +// until it is killed. +// +// Arguments: +// call - The call node +// +void LinearScan::MarkAsyncContinuationBusyForCall(GenTreeCall* call) +{ + // We model the async continuation like the swift error register: we ensure + // the node follows the call in lowering, and make it delay freed to ensure + // nothing is allocated into the register between the call and + // ASYNC_CONTINUATION node. We need to add a kill here in the right spot as + // not all targets may naturally have one created. + assert(call->gtNext != nullptr); + assert(call->gtNext->OperIs(GT_ASYNC_CONTINUATION)); + RefPosition* refPos = addKillForRegs(RBM_ASYNC_CONTINUATION_RET, currentLoc + 1); + setDelayFree(refPos); +} diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 529e6d8127b670..b1f699c95ce734 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -563,6 +563,11 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + case GT_INDEX_ADDR: assert(dstCount == 1); srcCount = BuildBinaryUses(tree->AsOp()); @@ -782,6 +787,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 4476bfb09af9cc..37f3bd2e59087f 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -767,6 +767,11 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + case GT_INDEX_ADDR: assert(dstCount == 1); srcCount = BuildBinaryUses(tree->AsOp()); @@ -1008,6 +1013,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 6f92d25d2b23ed..2a7e39be76e9ee 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -627,6 +627,11 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; + case GT_ASYNC_CONTINUATION: + srcCount = 0; + BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); + break; + #if defined(FEATURE_EH_WINDOWS_X86) case GT_END_LFIN: srcCount = 0; @@ -1358,6 +1363,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index f18e5935f0de4b..eca38011738fe3 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -709,6 +709,8 @@ const char* getWellKnownArgName(WellKnownArg arg) return "VarArgsCookie"; case WellKnownArg::InstParam: return "InstParam"; + case WellKnownArg::AsyncContinuation: + return "AsyncContinuation"; case WellKnownArg::RetBuffer: return "RetBuffer"; case WellKnownArg::PInvokeFrame: @@ -4510,6 +4512,12 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) } #endif + if (compIsAsync() != call->IsAsync()) + { + failTailCall("Caller and callee do not agree on async-ness"); + return nullptr; + } + // We have to ensure to pass the incoming retValBuf as the // outgoing one. Using a temp will not do as this function will // not regain control to do the copy. This can happen when inlining diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index 69b6c75c4a963d..8804cf515d2aa7 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -105,6 +105,7 @@ enum NamedIntrinsic : unsigned short NI_System_RuntimeType_get_TypeHandle, NI_System_StubHelpers_GetStubContext, NI_System_StubHelpers_NextCallReturnAddress, + NI_System_StubHelpers_AsyncCallContinuation, NI_Array_Address, NI_Array_Get, @@ -120,6 +121,8 @@ enum NamedIntrinsic : unsigned short NI_System_Runtime_CompilerServices_RuntimeHelpers_IsKnownConstant, NI_System_Runtime_CompilerServices_RuntimeHelpers_IsReferenceOrContainsReferences, NI_System_Runtime_CompilerServices_RuntimeHelpers_GetMethodTable, + NI_System_Runtime_CompilerServices_RuntimeHelpers_Await, + NI_System_Runtime_CompilerServices_RuntimeHelpers_AsyncSuspend, NI_System_Runtime_CompilerServices_StaticsHelpers_VolatileReadAsByref, @@ -152,6 +155,8 @@ enum NamedIntrinsic : unsigned short NI_System_Threading_Interlocked_ExchangeAdd, NI_System_Threading_Interlocked_MemoryBarrier, + NI_System_Threading_Tasks_Task_ConfigureAwait, + // These two are special marker IDs so that we still get the inlining profitability boost NI_System_Numerics_Intrinsic, NI_System_Runtime_Intrinsics_Intrinsic, diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index 9afb109802e85f..711e1d91e9a109 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -999,6 +999,11 @@ void Compiler::optValnumCSE_InitDataFlow() } } + if (compIsAsync()) + { + optValnumCSE_SetUpAsyncByrefKills(); + } + for (BasicBlock* const block : Blocks()) { // If the block doesn't contains a call then skip it... @@ -1082,6 +1087,112 @@ void Compiler::optValnumCSE_InitDataFlow() #endif // DEBUG } +//--------------------------------------------------------------------------- +// optValnumCSE_SetUpAsyncByrefKills: +// Compute kills because of async calls requiring byrefs not to be live +// across them. +// +void Compiler::optValnumCSE_SetUpAsyncByrefKills() +{ + bool anyAsyncKills = false; + cseAsyncKillsMask = BitVecOps::MakeFull(cseLivenessTraits); + for (unsigned inx = 1; inx <= optCSECandidateCount; inx++) + { + CSEdsc* dsc = optCSEtab[inx - 1]; + assert(dsc->csdIndex == inx); + bool isByRef = false; + if (dsc->csdTree->TypeIs(TYP_BYREF)) + { + isByRef = true; + } + else if (dsc->csdTree->TypeIs(TYP_STRUCT)) + { + ClassLayout* layout = dsc->csdTree->GetLayout(this); + isByRef = layout->HasGCByRef(); + } + + if (isByRef) + { + // We generate a bit pattern like: 1111111100111100 where there + // are 0s only for the byref CSEs. + BitVecOps::RemoveElemD(cseLivenessTraits, cseAsyncKillsMask, getCSEAvailBit(inx)); + BitVecOps::RemoveElemD(cseLivenessTraits, cseAsyncKillsMask, getCSEAvailCrossCallBit(inx)); + anyAsyncKills = true; + } + } + + if (!anyAsyncKills) + { + return; + } + + for (BasicBlock* block : Blocks()) + { + Statement* asyncCallStmt = nullptr; + GenTree* asyncCall = nullptr; + // Find last async call in block + Statement* stmt = block->lastStmt(); + if (stmt == nullptr) + { + continue; + } + + while (asyncCall == nullptr) + { + if ((stmt->GetRootNode()->gtFlags & GTF_CALL) != 0) + { + for (GenTree* tree = stmt->GetRootNode(); tree != nullptr; tree = tree->gtPrev) + { + if (tree->IsCall() && tree->AsCall()->IsAsync()) + { + asyncCallStmt = stmt; + asyncCall = tree; + break; + } + } + } + + if (stmt == block->firstStmt()) + break; + + stmt = stmt->GetPrevStmt(); + } + + if (asyncCall == nullptr) + { + continue; + } + + // This block has a suspension point. Make all BYREF CSEs unavailable. + BitVecOps::IntersectionD(cseLivenessTraits, block->bbCseGen, cseAsyncKillsMask); + BitVecOps::IntersectionD(cseLivenessTraits, block->bbCseOut, cseAsyncKillsMask); + + // Now make all byref CSEs after the suspension point available. + Statement* curStmt = asyncCallStmt; + GenTree* curTree = asyncCall; + while (true) + { + do + { + if (IS_CSE_INDEX(curTree->gtCSEnum)) + { + unsigned CSEnum = GET_CSE_INDEX(curTree->gtCSEnum); + BitVecOps::AddElemD(cseLivenessTraits, block->bbCseGen, getCSEAvailBit(CSEnum)); + BitVecOps::AddElemD(cseLivenessTraits, block->bbCseOut, getCSEAvailBit(CSEnum)); + } + + curTree = curTree->gtNext; + } while (curTree != nullptr); + + curStmt = curStmt->GetNextStmt(); + if (curStmt == nullptr) + break; + + curTree = curStmt->GetTreeList(); + } + } +} + /***************************************************************************** * * CSE Dataflow, so that all helper methods for dataflow are in a single place @@ -1577,7 +1688,7 @@ void Compiler::optValnumCSE_Availability() // kill all of the cseAvailCrossCallBit for each CSE whenever we see a GT_CALL (unless the call // generates a CSE). // - if (tree->OperGet() == GT_CALL) + if (tree->OperIs(GT_CALL)) { // Check for the common case of an already empty available_cses set // and thus nothing needs to be killed @@ -1595,6 +1706,12 @@ void Compiler::optValnumCSE_Availability() // BitVecOps::IntersectionD(cseLivenessTraits, available_cses, cseCallKillsMask); + // In async state machines, make all byref CSEs unavailable after suspension points. + if (tree->AsCall()->IsAsync() && compIsAsync()) + { + BitVecOps::IntersectionD(cseLivenessTraits, available_cses, cseAsyncKillsMask); + } + if (isDef) { // We can have a GT_CALL that produces a CSE, diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 9e180de420fabc..2b5f3d4b84281e 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -540,6 +540,9 @@ #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_RCX #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_RAX + #define REG_ASYNC_CONTINUATION_RET REG_RCX + #define RBM_ASYNC_CONTINUATION_RET RBM_RCX + // What sort of reloc do we use for [disp32] address mode #define IMAGE_REL_BASED_DISP32 IMAGE_REL_BASED_REL32 diff --git a/src/coreclr/jit/targetarm.h b/src/coreclr/jit/targetarm.h index 95cb19a2291a49..cd3d29fafef33d 100644 --- a/src/coreclr/jit/targetarm.h +++ b/src/coreclr/jit/targetarm.h @@ -247,6 +247,9 @@ #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R0 + #define REG_ASYNC_CONTINUATION_RET REG_R2 + #define RBM_ASYNC_CONTINUATION_RET RBM_R2 + #define REG_FPBASE REG_R11 #define RBM_FPBASE RBM_R11 #define STR_FPBASE "r11" diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index 3e1dec49b4778a..678a05e181e40d 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -263,6 +263,9 @@ #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R15 #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_R9 + #define REG_ASYNC_CONTINUATION_RET REG_R2 + #define RBM_ASYNC_CONTINUATION_RET RBM_R2 + #define REG_FPBASE REG_FP #define RBM_FPBASE RBM_FP #define STR_FPBASE "fp" diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h index 452778c31963a0..d691f4c8fd1ec2 100644 --- a/src/coreclr/jit/targetloongarch64.h +++ b/src/coreclr/jit/targetloongarch64.h @@ -246,6 +246,9 @@ #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_T3 #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_T0 + #define REG_ASYNC_CONTINUATION_RET REG_A2 + #define RBM_ASYNC_CONTINUATION_RET RBM_A2 + #define REG_FPBASE REG_FP #define RBM_FPBASE RBM_FP #define STR_FPBASE "fp" diff --git a/src/coreclr/jit/targetriscv64.h b/src/coreclr/jit/targetriscv64.h index e5dcded3d878f5..ee6c6d22260c7c 100644 --- a/src/coreclr/jit/targetriscv64.h +++ b/src/coreclr/jit/targetriscv64.h @@ -222,6 +222,9 @@ #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_T3 #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_T0 + #define REG_ASYNC_CONTINUATION_RET REG_A2 + #define RBM_ASYNC_CONTINUATION_RET RBM_A2 + #define REG_FPBASE REG_FP #define RBM_FPBASE RBM_FP #define STR_FPBASE "fp" diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h index dd63766d631ac7..e630a1ae842120 100644 --- a/src/coreclr/jit/targetx86.h +++ b/src/coreclr/jit/targetx86.h @@ -294,6 +294,9 @@ #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~RBM_ECX) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_ECX + #define REG_ASYNC_CONTINUATION_RET REG_ECX + #define RBM_ASYNC_CONTINUATION_RET RBM_ECX + #define REG_FPBASE REG_EBP #define RBM_FPBASE RBM_EBP #define STR_FPBASE "ebp" diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 30a66726279ca2..2730f930245767 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -10515,8 +10515,8 @@ static genTreeOps genTreeOpsIllegalAsVNFunc[] = {GT_IND, // When we do heap memo GT_NOP, // These control-flow operations need no values. - GT_JTRUE, GT_RETURN, GT_SWITCH, GT_RETFILT, GT_CKFINITE, - GT_SWIFT_ERROR_RET}; + GT_JTRUE, GT_RETURN, GT_RETURN_SUSPEND, GT_SWITCH, GT_RETFILT, + GT_CKFINITE, GT_SWIFT_ERROR_RET}; void ValueNumStore::ValidateValueNumStoreStatics() { @@ -12401,9 +12401,9 @@ void Compiler::fgValueNumberTree(GenTree* tree) break; case GT_CATCH_ARG: + case GT_ASYNC_CONTINUATION: case GT_SWIFT_ERROR: - // We know nothing about the value of a caught expression. - // We also know nothing about the error register's value post-Swift call. + // We know nothing about the value of these. tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); break; @@ -12773,6 +12773,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) case GT_SWITCH: case GT_RETURN: case GT_RETFILT: + case GT_RETURN_SUSPEND: case GT_NULLCHECK: if (tree->gtGetOp1() != nullptr) {