diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs index 584f6cd3e15965..dfa8f59429d969 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs @@ -328,6 +328,50 @@ public static void EnsureSufficientExecutionStack() [MethodImpl(MethodImplOptions.InternalCall)] public static extern bool TryEnsureSufficientExecutionStack(); + internal static unsafe bool CanStackAllocate(nuint size, nuint* allocatedInFrame) + { + nuint stackBase; + nuint stackLimit; + GetStackBounds(&stackBase, &stackLimit); + + nuint currentStackAddress = (nuint)(&stackBase); + if ((stackLimit == 0) || + (stackBase <= stackLimit) || + (currentStackAddress <= stackLimit) || + (currentStackAddress >= stackBase)) + { + // Unknown or unexpected stack bounds: use the heap. + return false; + } + + nuint stackSize = stackBase - stackLimit; + nuint stackUsed = stackBase - currentStackAddress; + if (stackUsed >= (stackSize >> 1)) + { + // The stack is already at least half consumed. + return false; + } + + nuint remainingStack = currentStackAddress - stackLimit; + // 1/128th of remaining stack; about 8 KB when 1 MB remains. + nuint stackAllocationLimit = remainingStack >> 7; + nuint newAllocatedInFrame = *allocatedInFrame + size; + if ((newAllocatedInFrame < *allocatedInFrame) || + (size > stackAllocationLimit) || + (newAllocatedInFrame > stackAllocationLimit)) + { + // Overflow, request too large, or this frame has used too much stack. + return false; + } + + *allocatedInFrame = newAllocatedInFrame; + + return true; + } + + [MethodImpl(MethodImplOptions.InternalCall)] + private static extern unsafe void GetStackBounds(nuint* stackBase, nuint* stackLimit); + public static object GetUninitializedObject( // This API doesn't call any constructors, but the type needs to be seen as constructed. // A type is seen as constructed if a constructor is kept. diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 4dfa9d39bc10c7..a5953f2a2812c4 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -485,6 +485,7 @@ enum CorInfoHelpFunc CORINFO_HELP_TAILCALL, // Perform a tail call CORINFO_HELP_GETCURRENTMANAGEDTHREADID, + CORINFO_HELP_CAN_STACK_ALLOCATE, // decide whether a conditional localloc is safe CORINFO_HELP_INIT_PINVOKE_FRAME, // initialize an inlined PInvoke Frame for the JIT-compiler diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index e0177d76cd5826..840530e1b9ea49 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* fcb1b400-696c-4425-a8a7-bb082430a217 */ - 0xfcb1b400, - 0x696c, - 0x4425, - {0xa8, 0xa7, 0xbb, 0x08, 0x24, 0x30, 0xa2, 0x17} +constexpr GUID JITEEVersionIdentifier = { /* f967451e-74d1-477d-8470-776ff4dc31eb */ + 0xf967451e, + 0x74d1, + 0x477d, + {0x84, 0x70, 0x77, 0x6f, 0xf4, 0xdc, 0x31, 0xeb} }; #endif // JIT_EE_VERSIONING_GUID_H diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index 7742945b23e7e4..a6b6a655e39205 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -225,6 +225,7 @@ #endif DYNAMICJITHELPER(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, NULL, METHOD__ENVIRONMENT__CURRENT_MANAGED_THREAD_ID) + DYNAMICJITHELPER(CORINFO_HELP_CAN_STACK_ALLOCATE, NULL, METHOD__RUNTIME_HELPERS__CAN_STACK_ALLOCATE) JITHELPER(CORINFO_HELP_INIT_PINVOKE_FRAME, JIT_InitPInvokeFrame, METHOD__NIL) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 2bae2090b07035..2e7f158284df17 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -391,6 +391,8 @@ void CodeGen::genLclHeap(GenTree* tree) GenTree* size = tree->AsOp()->gtOp1; noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); + bool const initMem = m_compiler->gtMustZeroLocalloc(tree); + // Result of localloc will be returned in regCnt. // Also it used as temporary register in code generation // for storing allocation size @@ -476,7 +478,7 @@ void CodeGen::genLclHeap(GenTree* tree) goto ALLOC_DONE; } - else if (!m_compiler->info.compInitMem && (amount < m_compiler->eeGetPageSize())) // must be < not <= + else if (!initMem && (amount < m_compiler->eeGetPageSize())) // must be < not <= { // Since the size is less than a page, simply adjust the SP value. // The SP might already be in the guard page, must touch it BEFORE @@ -500,7 +502,7 @@ void CodeGen::genLclHeap(GenTree* tree) } // Allocation - if (m_compiler->info.compInitMem) + if (initMem) { // At this point 'regCnt' is set to the total number of bytes to localloc. // Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index f5ac76f6c50067..0d6477aa419bf5 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -3105,7 +3105,7 @@ void CodeGen::genLclHeap(GenTree* tree) noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes noway_assert(genStackLevel == 0); // Can't have anything on the stack - bool needsZeroing = m_compiler->info.compInitMem; + bool initMem = m_compiler->gtMustZeroLocalloc(tree); // compute the amount of memory to allocate to properly STACK_ALIGN. size_t amount = 0; @@ -3113,7 +3113,10 @@ void CodeGen::genLclHeap(GenTree* tree) { // The size node being a contained constant means that Lower has taken care of // zeroing the memory if compInitMem is true. - needsZeroing = false; + if (m_compiler->info.compInitMem) + { + initMem = false; + } // If amount is zero then return null in targetReg amount = size->AsIntCon()->IconValue(); @@ -3137,7 +3140,7 @@ void CodeGen::genLclHeap(GenTree* tree) // Compute the size of the block to allocate and perform alignment. // If compInitMem=true, we can reuse targetReg as regcnt, // since we don't need any internal registers. - if (needsZeroing) + if (initMem) { assert(internalRegisters.Count(tree) == 0); regCnt = targetReg; @@ -3224,10 +3227,10 @@ void CodeGen::genLclHeap(GenTree* tree) } // else, "mov regCnt, amount" - // If compInitMem=true, we can reuse targetReg as regcnt. + // If initMem=true, we can reuse targetReg as regcnt. // Since size is a constant, regCnt is not yet initialized. assert(regCnt == REG_NA); - if (needsZeroing) + if (initMem) { assert(internalRegisters.Count(tree) == 0); regCnt = targetReg; @@ -3239,7 +3242,7 @@ void CodeGen::genLclHeap(GenTree* tree) instGen_Set_Reg_To_Imm(((unsigned int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount); } - if (needsZeroing) + if (initMem) { BasicBlock* loop = genCreateTempLabel(); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index aa84d692dbe7ed..ea826e491d5b94 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -1455,6 +1455,8 @@ void CodeGen::genLclHeap(GenTree* tree) noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes noway_assert(genStackLevel == 0); // Can't have anything on the stack + bool const initMem = m_compiler->gtMustZeroLocalloc(tree); + // compute the amount of memory to allocate to properly STACK_ALIGN. size_t amount = 0; if (size->IsCnsIntOrI()) @@ -1481,9 +1483,9 @@ void CodeGen::genLclHeap(GenTree* tree) emit->emitIns_J_cond_la(INS_beq, endLabel, targetReg, REG_R0); // Compute the size of the block to allocate and perform alignment. - // If compInitMem=true, we can reuse targetReg as regcnt, + // If initMem=true, we can reuse targetReg as regcnt, // since we don't need any internal registers. - if (m_compiler->info.compInitMem) + if (initMem) { assert(internalRegisters.Count(tree) == 0); regCnt = targetReg; @@ -1535,7 +1537,7 @@ void CodeGen::genLclHeap(GenTree* tree) static_assert(STACK_ALIGN == (REGSIZE_BYTES * 2)); assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time size_t stpCount = amount / (REGSIZE_BYTES * 2); - if (m_compiler->info.compInitMem) + if (initMem) { if (stpCount <= 4) { @@ -1582,10 +1584,10 @@ void CodeGen::genLclHeap(GenTree* tree) } // else, "mov regCnt, amount" - // If compInitMem=true, we can reuse targetReg as regcnt. + // If initMem=true, we can reuse targetReg as regcnt. // Since size is a constant, regCnt is not yet initialized. assert(regCnt == REG_NA); - if (m_compiler->info.compInitMem) + if (initMem) { assert(internalRegisters.Count(tree) == 0); regCnt = targetReg; @@ -1597,7 +1599,7 @@ void CodeGen::genLclHeap(GenTree* tree) instGen_Set_Reg_To_Imm(((unsigned int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount); } - if (m_compiler->info.compInitMem) + if (initMem) { // At this point 'regCnt' is set to the total number of bytes to locAlloc. // Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 0f3c8c5e912001..18b35706f84d77 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -1446,6 +1446,7 @@ void CodeGen::genLclHeap(GenTree* tree) noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes noway_assert(genStackLevel == 0); // Can't have anything on the stack + bool const initMem = m_compiler->gtMustZeroLocalloc(tree); const target_size_t pageSize = m_compiler->eeGetPageSize(); // According to RISC-V Privileged ISA page size is 4KiB @@ -1477,9 +1478,9 @@ void CodeGen::genLclHeap(GenTree* tree) emit->emitIns_J_cond_la(INS_beq, endLabel, targetReg, REG_R0); // Compute the size of the block to allocate and perform alignment. - // If compInitMem=true, we can reuse targetReg as regcnt, + // If initMem=true, we can reuse targetReg as regcnt, // since we don't need any internal registers. - if (m_compiler->info.compInitMem) + if (initMem) { regCnt = targetReg; } @@ -1531,7 +1532,7 @@ void CodeGen::genLclHeap(GenTree* tree) static_assert(STACK_ALIGN == (REGSIZE_BYTES * 2)); assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time size_t stpCount = amount / (REGSIZE_BYTES * 2); - if (m_compiler->info.compInitMem) + if (initMem) { if (stpCount <= 4) { @@ -1580,10 +1581,10 @@ void CodeGen::genLclHeap(GenTree* tree) } // else, "mov regCnt, amount" - // If compInitMem=true, we can reuse targetReg as regcnt. + // If initMem=true, we can reuse targetReg as regcnt. // Since size is a constant, regCnt is not yet initialized. assert(regCnt == REG_NA); - if (m_compiler->info.compInitMem) + if (initMem) { regCnt = targetReg; } @@ -1594,7 +1595,7 @@ void CodeGen::genLclHeap(GenTree* tree) instGen_Set_Reg_To_Imm(((unsigned int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount); } - if (m_compiler->info.compInitMem) + if (initMem) { // At this point 'regCnt' is set to the total number of bytes to locAlloc. // Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 9761d6f09edc04..6d8ae1353d1f55 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -3237,7 +3237,7 @@ void CodeGen::genLclHeap(GenTree* tree) assert(m_compiler->compLocallocUsed); assert(isFramePointerUsed()); - bool const needsZeroing = m_compiler->info.compInitMem; + bool const needsZeroing = m_compiler->gtMustZeroLocalloc(tree); GenTree* const size = tree->AsOp()->gtOp1; // We reserve this amount of space below any allocation for diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index a799120b4cb3ae..733a024affe255 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2798,6 +2798,8 @@ void CodeGen::genLclHeap(GenTree* tree) target_size_t stackAdjustment = 0; target_size_t locAllocStackOffset = 0; + bool const initMem = m_compiler->gtMustZeroLocalloc(tree); + // compute the amount of memory to allocate to properly STACK_ALIGN. size_t amount = 0; if (size->IsCnsIntOrI() && size->isContained()) @@ -2821,7 +2823,7 @@ void CodeGen::genLclHeap(GenTree* tree) // Compute the size of the block to allocate and perform alignment. // If compInitMem=true, we can reuse targetReg as regcnt, // since we don't need any internal registers. - if (m_compiler->info.compInitMem) + if (initMem) { assert(internalRegisters.Count(tree) == 0); regCnt = targetReg; @@ -2846,7 +2848,7 @@ void CodeGen::genLclHeap(GenTree* tree) inst_RV_IV(INS_add, regCnt, STACK_ALIGN - 1, emitActualTypeSize(type)); - if (m_compiler->info.compInitMem) + if (initMem) { // Convert the count from a count of bytes to a loop count. We will loop once per // stack alignment size, so each loop will zero 4 bytes on Windows/x86, and 16 bytes @@ -2867,7 +2869,7 @@ void CodeGen::genLclHeap(GenTree* tree) } bool initMemOrLargeAlloc; // Declaration must be separate from initialization to avoid clang compiler error. - initMemOrLargeAlloc = m_compiler->info.compInitMem || (amount >= m_compiler->eeGetPageSize()); // must be >= not > + initMemOrLargeAlloc = initMem || (amount >= m_compiler->eeGetPageSize()); // must be >= not > #if FEATURE_FIXED_OUT_ARGS // If we have an outgoing arg area then we must adjust the SP by popping off the @@ -2941,7 +2943,7 @@ void CodeGen::genLclHeap(GenTree* tree) // We should not have any temp registers at this point. assert(internalRegisters.Count(tree) == 0); - if (m_compiler->info.compInitMem) + if (initMem) { // At this point 'regCnt' is set to the number of loop iterations for this loop, if each // iteration zeros (and subtracts from the stack pointer) STACK_ALIGN bytes. diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index e9937b323bf992..4e1e52805eaf84 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3956,6 +3956,15 @@ class Compiler bool gtTreeHasLocalRead(GenTree* tree, unsigned lclNum); bool gtTreeHasLocalStore(GenTree* tree, unsigned lclNum); + // Returns true iff the LCLHEAP node "tree" must zero-initialize its + // allocation, either because the method requests init-mem semantics or + // because the node carries the GTF_LCLHEAP_MUSTINIT flag. + bool gtMustZeroLocalloc(GenTree* tree) + { + assert(tree->OperIs(GT_LCLHEAP)); + return info.compInitMem || ((tree->gtFlags & GTF_LCLHEAP_MUSTINIT) != 0); + } + void gtSetStmtInfo(Statement* stmt); // Returns "true" iff "node" has any of the side effects in "flags". @@ -6634,7 +6643,10 @@ class Compiler bool fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); PhaseStatus fgExpandStackArrayAllocations(); - bool fgExpandStackArrayAllocation(BasicBlock* pBlock, Statement* stmt, GenTreeCall* call); + bool fgExpandStackArrayAllocation(BasicBlock* pBlock, + Statement* stmt, + GenTreeCall* call, + unsigned& frameRunningTotalLclNum); PhaseStatus fgVNBasedIntrinsicExpansion(); bool fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 81d50ebbc9cc94..0c5691c5fd632a 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -14585,6 +14585,8 @@ const char* Compiler::gtGetWellKnownArgNameForArgMsg(WellKnownArg arg) return "tail call"; case WellKnownArg::StackArrayLocal: return "&lcl arr"; + case WellKnownArg::StackArrayElemSize: + return "arr elemsz"; case WellKnownArg::RuntimeMethodHandle: return "meth hnd"; case WellKnownArg::AsyncExecutionContext: diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index b66874618c106b..9b070a63b0eb9e 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -538,6 +538,8 @@ enum GenTreeFlags : unsigned GTF_ALLOCOBJ_EMPTY_STATIC = 0x80000000, // GT_ALLOCOBJ -- allocation site is part of an empty static pattern + GTF_LCLHEAP_MUSTINIT = 0x80000000, // GT_LCLHEAP -- allocation must be zeroed + #ifdef FEATURE_HW_INTRINSICS GTF_HW_EM_OP = 0x10000000, // GT_HWINTRINSIC -- node is used as an operand to an embedded mask GTF_HW_USER_CALL = 0x20000000, // GT_HWINTRINSIC -- node is implemented via a user call @@ -4813,6 +4815,7 @@ enum class WellKnownArg : unsigned SwiftSelf, X86TailCallSpecialArg, StackArrayLocal, + StackArrayElemSize, RuntimeMethodHandle, AsyncExecutionContext, AsyncSynchronizationContext, diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index f917dc1d441dc2..79f2f7039a59ab 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -2801,8 +2801,15 @@ PhaseStatus Compiler::fgExpandStackArrayAllocations() // bool modified = false; + // Lazily-allocated TYP_I_IMPL local that accumulates the per-invocation + // total bytes of conditional (localloc) stack allocations. Initialized + // on first use by fgExpandStackArrayAllocation. + // + unsigned frameRunningTotalLclNum = BAD_VAR_NUM; + for (BasicBlock* const block : Blocks()) { + bool blockModified = false; for (Statement* const stmt : block->Statements()) { if ((stmt->GetRootNode()->gtFlags & GTF_CALL) == 0) @@ -2817,15 +2824,28 @@ PhaseStatus Compiler::fgExpandStackArrayAllocations() continue; } - if (fgExpandStackArrayAllocation(block, stmt, tree->AsCall())) + GenTreeCall* const call = tree->AsCall(); + const bool isLocAlloc = call->gtArgs.FindWellKnownArg(WellKnownArg::StackArrayElemSize) != nullptr; + if (fgExpandStackArrayAllocation(block, stmt, call, frameRunningTotalLclNum)) { // If we expand, we split the statement's tree - // so will be done with this statment. + // so will be done with this statement. Localloc expansion + // also moves the statement to a new block, so the current + // block's statement iterator is no longer valid. // modified = true; + if (isLocAlloc) + { + blockModified = true; + } break; } } + + if (blockModified) + { + break; + } } } @@ -2846,7 +2866,19 @@ PhaseStatus Compiler::fgExpandStackArrayAllocations() // Returns: // true if a runtime lookup was found and expanded. // -bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, GenTreeCall* call) +// Remarks: +// For arrays whose size was large or not known during stack allocation analysis, +// the allocation expands into runtime checks followed by localloc (if small) +// or heapalloc (if big). +// +// For known sized arrays that do not require runtime dispatch, we assume +// upstream analysis has limited size to something reasonable, and the +// allocation is into fixed local storage. +// +bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, + Statement* stmt, + GenTreeCall* call, + unsigned& frameRunningTotalLclNum) { if (!call->IsHelperCall()) { @@ -2871,17 +2903,29 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, return false; } - // If this is a local array, the new helper will have an arg for the array's address + // If this is a local array, the new helper will have an arg for the array's address or an arg + // for the array element size // CallArg* const stackLocalAddressArg = call->gtArgs.FindWellKnownArg(WellKnownArg::StackArrayLocal); + CallArg* const elemSizeArg = call->gtArgs.FindWellKnownArg(WellKnownArg::StackArrayElemSize); - if (stackLocalAddressArg == nullptr) + if ((stackLocalAddressArg == nullptr) && (elemSizeArg == nullptr)) { return false; } - JITDUMP("Expanding new array helper for stack allocated array at [%06d] in " FMT_BB ":\n", dspTreeID(call), - block->bbNum); + // If we have an elem size arg, this is intended to be a localloc/heapalloc + // + // Note we may have figured out the array length after we did the + // escape analysis (that is, lengthArg might be a constant), so we + // could possibly change this from a localloc to a fixed alloc, + // if we could show that was sound. + // + bool const isLocAlloc = (elemSizeArg != nullptr); + bool const isAlign8 = isLocAlloc && (helper == CORINFO_HELP_NEWARR_1_ALIGN8); + + JITDUMP("Expanding new array helper for stack allocated array at [%06d] %sin " FMT_BB ":\n", dspTreeID(call), + isLocAlloc ? " into localloc " : "", block->bbNum); DISPTREE(call); JITDUMP("\n"); @@ -2898,30 +2942,360 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, } } - GenTree* const stackLocalAddress = stackLocalAddressArg->GetNode(); + unsigned resultLclNum = BAD_VAR_NUM; + if (isLocAlloc) + { + GenTree* const stmtRoot = stmt->GetRootNode(); + if (stmtRoot->OperIs(GT_STORE_LCL_VAR) && (stmtRoot->AsLclVarCommon()->Data() == *callUse)) + { + resultLclNum = stmtRoot->AsLclVarCommon()->GetLclNum(); + } + else + { + resultLclNum = lvaGrabTemp(true DEBUGARG("stack array result")); + lvaTable[resultLclNum].lvType = genActualType(call); + + GenTree* const resultStore = gtNewStoreLclVarNode(resultLclNum, call); + Statement* const resultStoreStmt = fgNewStmtFromTree(resultStore); + gtUpdateStmtSideEffects(resultStoreStmt); + fgInsertStmtBefore(block, stmt, resultStoreStmt); + + *callUse = gtNewLclVarNode(resultLclNum); + gtSetStmtInfo(stmt); + fgSetStmtSeq(stmt); + gtUpdateStmtSideEffects(stmt); + + stmt = resultStoreStmt; + callUse = &stmt->GetRootNode()->AsLclVarCommon()->Data(); + } + } + + GenTree* lengthArg = call->gtArgs.GetArgByIndex(lengthArgIndex)->GetNode(); + GenTree* stackLocalAddress = nullptr; + + // Temps holding the once-evaluated length and method-table args for the + // localloc path. Used by both the dispatch path and the header init, + // so declared at function scope. + // + unsigned lengthTemp = BAD_VAR_NUM; + unsigned typeTemp = BAD_VAR_NUM; + + // If we have a localloc, compute (at runtime) overall size, and check length + // against a threshold. If over, heap allocate. + // + if (isLocAlloc) + { + assert(elemSizeArg != nullptr); + assert(stackLocalAddressArg == nullptr); + GenTree* const elemSize = elemSizeArg->GetNode(); + assert(elemSize->IsCnsIntOrI()); + + // Spill the length and method-table args to fresh temps so all + // downstream consumers (size compute, runtime check, header + // init, heap-fallback call) reference a temp use instead of + // cloning the original (possibly side-effecting / non-clonable) + // expressions. Replace the call's arg slots with a temp use so + // the original expressions live in exactly one place. + // + { + GenTree*& lengthArgRef = call->gtArgs.GetArgByIndex(lengthArgIndex)->NodeRef(); + GenTree* const origLength = lengthArgRef; + lengthTemp = lvaGrabTemp(true DEBUGARG("stack array length")); + lvaTable[lengthTemp].lvType = genActualType(origLength); + + GenTree* const lengthSpill = gtNewStoreLclVarNode(lengthTemp, origLength); + Statement* const lengthSpillStmt = fgNewStmtFromTree(lengthSpill); + gtUpdateStmtSideEffects(lengthSpillStmt); + fgInsertStmtBefore(block, stmt, lengthSpillStmt); + + lengthArgRef = gtNewLclVarNode(lengthTemp); + lengthArg = lengthArgRef; + } + { + GenTree*& typeArgRef = call->gtArgs.GetArgByIndex(typeArgIndex)->NodeRef(); + GenTree* const origType = typeArgRef; + typeTemp = lvaGrabTemp(true DEBUGARG("stack array method table")); + lvaTable[typeTemp].lvType = genActualType(origType); + + GenTree* const typeSpill = gtNewStoreLclVarNode(typeTemp, origType); + Statement* const typeSpillStmt = fgNewStmtFromTree(typeSpill); + gtUpdateStmtSideEffects(typeSpillStmt); + fgInsertStmtBefore(block, stmt, typeSpillStmt); + + typeArgRef = gtNewLclVarNode(typeTemp); + } + + unsigned const locallocTemp = lvaGrabTemp(true DEBUGARG("localloc stack address")); + lvaTable[locallocTemp].lvType = TYP_I_IMPL; + + GenTree* const arrayLength = gtNewLclVarNode(lengthTemp); + GenTree* const nativeLength = fgOptimizeCast(gtNewCastNode(TYP_I_IMPL, arrayLength, false, TYP_I_IMPL)); + GenTree* const baseSize = gtNewIconNode(OFFSETOF__CORINFO_Array__data, TYP_I_IMPL); + GenTree* const payloadSize = gtNewOperNode(GT_MUL, TYP_I_IMPL, elemSize, nativeLength); + GenTree* totalSize = gtNewOperNode(GT_ADD, TYP_I_IMPL, baseSize, payloadSize); + + unsigned const elemSizeValue = (unsigned)elemSize->AsIntCon()->IconValue(); + + if ((elemSizeValue % TARGET_POINTER_SIZE) != 0) + { + // Round size up to TARGET_POINTER_SIZE. + // size = (size + TPS - 1) & ~(TPS - 1) + // + GenTree* const roundSize = gtNewIconNode(TARGET_POINTER_SIZE - 1, TYP_I_IMPL); + GenTree* const biasedSize = gtNewOperNode(GT_ADD, TYP_I_IMPL, totalSize, roundSize); + GenTree* const mask = gtNewIconNode(TARGET_POINTER_SIZE - 1, TYP_I_IMPL); + GenTree* const invMask = gtNewOperNode(GT_NOT, TYP_I_IMPL, mask); + GenTree* const paddedSize = gtNewOperNode(GT_AND, TYP_I_IMPL, biasedSize, invMask); + + totalSize = paddedSize; + } + +#ifndef TARGET_64BIT + if (isAlign8) + { + // For Align8, allocate an extra TARGET_POINTER_SIZED (4) bytes so + // we can fix alignment below. + // + GenTree* const alignSize = gtNewIconNode(4, TYP_I_IMPL); + totalSize = gtNewOperNode(GT_ADD, TYP_I_IMPL, totalSize, alignSize); + } +#endif + + // We will need total size twice, so spill it to a local + // + unsigned const totalSizeTemp = lvaGrabTemp(false DEBUGARG("lcl/heap alloc size")); + lvaTable[totalSizeTemp].lvType = TYP_I_IMPL; + GenTree* const totalSizeStore = gtNewStoreLclVarNode(totalSizeTemp, totalSize); + + Statement* const totalSizeStmt = fgNewStmtFromTree(totalSizeStore); + gtUpdateStmtSideEffects(totalSizeStmt); + fgInsertStmtBefore(block, stmt, totalSizeStmt); + + // Check the length against a JIT-time-precomputed safe upper bound using + // an unsigned compare so that negative lengths are routed to the + // heap-fallback helper. The helper validates length and raises the + // appropriate exception. This is not the stack/heap policy; it only + // ensures the size expression passed to the policy helper did not wrap. + // + size_t const baseBytes = (size_t)OFFSETOF__CORINFO_Array__data; +#ifndef TARGET_64BIT + size_t const align8Pad = isAlign8 ? 4 : 0; +#else + size_t const align8Pad = 0; +#endif + size_t maxSafeLength = CORINFO_Array_MaxLength; +#ifndef TARGET_64BIT + if (SIZE_MAX > baseBytes + align8Pad) + { + assert(elemSizeValue > 0); + maxSafeLength = min(maxSafeLength, (SIZE_MAX - baseBytes - align8Pad) / elemSizeValue); + // The pointer-size round-up below can add up to (TPS - 1) bytes; + // trim one element to absorb that slack. + if (((elemSizeValue % TARGET_POINTER_SIZE) != 0) && (maxSafeLength > 0)) + { + maxSafeLength--; + } + } +#endif + + GenTree* const lengthForCheck = gtNewLclVarNode(lengthTemp); + var_types const lengthType = genActualType(lengthForCheck); + GenTree* const lengthLimit = gtNewIconNode((ssize_t)maxSafeLength, lengthType); + GenTree* const lengthCompare = gtNewOperNode(GT_GT, TYP_INT, lengthForCheck, lengthLimit); + lengthCompare->gtFlags |= GTF_UNSIGNED; + + // Lazily allocate the per-frame running-total local, and insert an + // explicit zero-init store at the top of fgFirstBB. Independent of + // compInitMem and prolog zero-init policy. + // + if (frameRunningTotalLclNum == BAD_VAR_NUM) + { + frameRunningTotalLclNum = lvaGrabTemp(false DEBUGARG("stack alloc frame running total")); + lvaTable[frameRunningTotalLclNum].lvType = TYP_I_IMPL; + lvaSetVarAddrExposed(frameRunningTotalLclNum DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS)); + + GenTree* const zeroInit = gtNewStoreLclVarNode(frameRunningTotalLclNum, gtNewIconNode(0, TYP_I_IMPL)); + Statement* const zeroInitStmt = fgNewStmtFromTree(zeroInit); + gtUpdateStmtSideEffects(zeroInitStmt); + fgInsertStmtAtBeg(fgFirstBB, zeroInitStmt); + + JITDUMP("Created stack alloc frame running total V%02u, zero-init at " FMT_BB "\n", frameRunningTotalLclNum, + fgFirstBB->bbNum); + } + + GenTree* const lengthCheck = gtNewOperNode(GT_JTRUE, TYP_VOID, lengthCompare); + + Statement* const lengthCheckStmt = fgNewStmtFromTree(lengthCheck); + gtUpdateStmtSideEffects(lengthCheckStmt); + fgInsertStmtBefore(block, stmt, lengthCheckStmt); + + // Split block after the call, and insert blocks for the helper check, + // the localloc, and the heap alloc. + // + BasicBlock* const remainderBlock = fgSplitBlockAfterStatement(block, stmt); + BasicBlock* const helperCheckBlock = fgNewBBafter(BBJ_ALWAYS, block, /* extendRegion */ true); + BasicBlock* const locallocBlock = fgNewBBafter(BBJ_ALWAYS, helperCheckBlock, /* extendRegion */ true); + BasicBlock* const heapallocBlock = fgNewBBafter(BBJ_ALWAYS, locallocBlock, /* extendRegion */ true); + + // Ask the managed helper to decide if this request should use the stack. + // The helper accounts for request size, current stack usage, and the + // per-frame running total. It must run only after the length check has + // passed since it updates the per-frame running total on success. + // + GenTreeCall* stackAllocHelperCall = + gtNewHelperCallNode(CORINFO_HELP_CAN_STACK_ALLOCATE, TYP_INT, gtNewLclVarNode(totalSizeTemp), + gtNewLclVarAddrNode(frameRunningTotalLclNum, TYP_I_IMPL)); + stackAllocHelperCall = fgMorphArgs(stackAllocHelperCall); + + GenTree* const helperFailed = gtNewOperNode(GT_EQ, TYP_INT, stackAllocHelperCall, gtNewIconNode(0, TYP_INT)); + GenTree* const helperCheck = gtNewOperNode(GT_JTRUE, TYP_VOID, helperFailed); + Statement* const helperCheckStmt = fgNewStmtFromTree(helperCheck); + gtUpdateStmtSideEffects(helperCheckStmt); + fgInsertStmtAtBeg(helperCheckBlock, helperCheckStmt); + + // Wire up new flow.... assume (for now) localloc is more likely + // + FlowEdge* const blockRemainderEdge = fgGetPredForBlock(remainderBlock, block); + fgRemoveRefPred(blockRemainderEdge); + + FlowEdge* const helperCheckInEdge = fgAddRefPred(helperCheckBlock, block); + FlowEdge* const helperHeapInEdge = fgAddRefPred(heapallocBlock, helperCheckBlock); + FlowEdge* const locallocInEdge = fgAddRefPred(locallocBlock, helperCheckBlock); + FlowEdge* const locallocOutEdge = fgAddRefPred(remainderBlock, locallocBlock); + + helperCheckInEdge->setLikelihood(0.99); + helperCheckBlock->inheritWeightPercentage(block, 99); + helperCheckBlock->SetCond(helperHeapInEdge, locallocInEdge); + + helperHeapInEdge->setLikelihood(0.2); + locallocInEdge->setLikelihood(0.8); + locallocOutEdge->setLikelihood(1.0); + locallocBlock->SetTargetEdge(locallocOutEdge); + + FlowEdge* const heapallocInEdge = fgAddRefPred(heapallocBlock, block); + FlowEdge* const heapallocOutEdge = fgAddRefPred(remainderBlock, heapallocBlock); + + heapallocInEdge->setLikelihood(0.01); + heapallocOutEdge->setLikelihood(1.0); + heapallocBlock->SetTargetEdge(heapallocOutEdge); + + block->SetCond(heapallocInEdge, helperCheckInEdge); + + locallocBlock->inheritWeightPercentage(helperCheckBlock, 80); + heapallocBlock->inheritWeight(block); + heapallocBlock->bbWeight = heapallocBlock->computeIncomingWeight(); + + // Now fill in the heapalloc block. + // + // Create a helper call just like call, but without the extra arguments + // + GenTreeCall* newCall = gtNewCallNode(CT_HELPER, call->gtCallMethHnd, call->TypeGet()); + + newCall->gtArgs.PushBack(this, NewCallArg::Primitive(gtNewLclVarNode(typeTemp))); + newCall->gtArgs.PushBack(this, NewCallArg::Primitive(gtNewLclVarNode(lengthTemp))); + newCall->gtFlags = call->gtFlags; +#if defined(FEATURE_READYTORUN) + newCall->setEntryPoint(call->gtEntryPoint); +#endif // FEATURE_READYTORUN + newCall = fgMorphArgs(newCall); + + assert(resultLclNum != BAD_VAR_NUM); + GenTree* const heapAllocStore = gtNewStoreLclVarNode(resultLclNum, newCall); + Statement* const heapAllocStmt = fgNewStmtFromTree(heapAllocStore); + + gtUpdateStmtSideEffects(heapAllocStmt); + fgInsertStmtAtBeg(heapallocBlock, heapAllocStmt); + + // Fill in the first part of the localloc block + // + fgUnlinkStmt(block, stmt); + fgInsertStmtAtBeg(locallocBlock, stmt); + + GenTree* const totalSizeForAlloc = gtNewLclVarNode(totalSizeTemp); + GenTree* const locallocNode = gtNewOperNode(GT_LCLHEAP, TYP_I_IMPL, totalSizeForAlloc); + + // Allocation might fail. Codegen must zero the allocation + // + locallocNode->gtFlags |= (GTF_EXCEPT | GTF_LCLHEAP_MUSTINIT); + + GenTree* const locallocStore = gtNewStoreLclVarNode(locallocTemp, locallocNode); + Statement* const locallocStmt = fgNewStmtFromTree(locallocStore); + + gtUpdateStmtSideEffects(locallocStmt); + fgInsertStmtBefore(locallocBlock, stmt, locallocStmt); + + // Array address is the result of the localloc + // + stackLocalAddress = gtNewLclVarNode(locallocTemp); + compLocallocUsed = true; + +#ifndef TARGET_64BIT + if (isAlign8) + { + // For Align8, adjust address to be suitably aligned. + // Addr = (Localloc + 4) & ~7; + // + GenTree* const alignSize = gtNewIconNode(4, TYP_I_IMPL); + GenTree* const biasedAddress = gtNewOperNode(GT_ADD, TYP_I_IMPL, stackLocalAddress, alignSize); + GenTree* const alignMaskInv = gtNewIconNode(-8, TYP_I_IMPL); + GenTree* const alignedAddress = gtNewOperNode(GT_AND, TYP_I_IMPL, biasedAddress, alignMaskInv); + GenTree* const alignedStore = gtNewStoreLclVarNode(locallocTemp, alignedAddress); + Statement* const alignedStmt = fgNewStmtFromTree(alignedStore); + + gtUpdateStmtSideEffects(alignedStmt); + fgInsertStmtBefore(locallocBlock, stmt, alignedStmt); + stackLocalAddress = gtNewLclVarNode(locallocTemp); + } +#endif + + // We now require a frame pointer + // + codeGen->setFramePointerRequired(true); + + // Update block so code below finishes initializing the localloc array + // in the localloc block. + // + block = locallocBlock; + } + else + { + assert(elemSizeArg == nullptr); + assert(stackLocalAddressArg != nullptr); + + // Array address is the block local we created earlier + // + stackLocalAddress = stackLocalAddressArg->GetNode(); + } // Initialize the array method table pointer. // - GenTree* const mt = call->gtArgs.GetArgByIndex(typeArgIndex)->GetNode(); - GenTree* const mtStore = gtNewStoreValueNode(TYP_I_IMPL, stackLocalAddress, mt); - Statement* const mtStmt = fgNewStmtFromTree(mtStore); + GenTree* const mt = call->gtArgs.GetArgByIndex(typeArgIndex)->GetNode(); + GenTree* const mtToStore = isLocAlloc ? gtNewLclVarNode(typeTemp) : mt; + GenTree* const mtStore = gtNewStoreValueNode(TYP_I_IMPL, stackLocalAddress, mtToStore); + Statement* const mtStmt = fgNewStmtFromTree(mtStore); fgInsertStmtBefore(block, stmt, mtStmt); // Initialize the array length. // - GenTree* const lengthArg = call->gtArgs.GetArgByIndex(lengthArgIndex)->GetNode(); - GenTree* const lengthArgInt = fgOptimizeCast(gtNewCastNode(TYP_INT, lengthArg, false, TYP_INT)); - GenTree* const lengthAddress = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(stackLocalAddress), - gtNewIconNode(OFFSETOF__CORINFO_Array__length, TYP_I_IMPL)); - GenTree* const lengthStore = gtNewStoreValueNode(TYP_INT, lengthAddress, lengthArgInt); - Statement* const lenStmt = fgNewStmtFromTree(lengthStore); + GenTree* const arrayLengthToStore = isLocAlloc ? gtNewLclVarNode(lengthTemp) : lengthArg; + GenTree* const lengthArgInt = fgOptimizeCast(gtNewCastNode(TYP_INT, arrayLengthToStore, false, TYP_INT)); + GenTree* const lengthAddress = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(stackLocalAddress), + gtNewIconNode(OFFSETOF__CORINFO_Array__length, TYP_I_IMPL)); + GenTree* const lengthStore = gtNewStoreValueNode(TYP_INT, lengthAddress, lengthArgInt); + Statement* const lenStmt = fgNewStmtFromTree(lengthStore); fgInsertStmtBefore(block, stmt, lenStmt); - // Replace call with local address + // Replace call with local address. // - *callUse = gtCloneExpr(stackLocalAddress); + GenTree* replacement = gtCloneExpr(stackLocalAddress); + if (isLocAlloc) + { + replacement->ChangeType(TYP_BYREF); + } + + *callUse = replacement; DEBUG_DESTROY_NODE(call); fgMorphStmtBlockOps(block, stmt); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 8a87207fe96c8d..a492436ae22efc 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -718,6 +718,8 @@ RELEASE_CONFIG_INTEGER(JitObjectStackAllocationConditionalEscape, "JitObjectStac CONFIG_STRING(JitObjectStackAllocationConditionalEscapeRange, "JitObjectStackAllocationConditionalEscapeRange") RELEASE_CONFIG_INTEGER(JitObjectStackAllocationArray, "JitObjectStackAllocationArray", 1) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationSize, "JitObjectStackAllocationSize", 528) +RELEASE_CONFIG_INTEGER(JitObjectStackAllocationLocalloc, "JitObjectStackAllocationLocalloc", 1) +RELEASE_CONFIG_INTEGER(JitObjectStackAllocationInLoop, "JitObjectStackAllocationInLoop", 1) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationTrackFields, "JitObjectStackAllocationTrackFields", 1) CONFIG_STRING(JitObjectStackAllocationTrackFieldsRange, "JitObjectStackAllocationTrackFieldsRange") CONFIG_INTEGER(JitObjectStackAllocationDumpConnGraph, "JitObjectStackAllocationDumpConnGraph", 0) diff --git a/src/coreclr/jit/jitmetadatalist.h b/src/coreclr/jit/jitmetadatalist.h index 7fc8865617b9dc..396624ed378946 100644 --- a/src/coreclr/jit/jitmetadatalist.h +++ b/src/coreclr/jit/jitmetadatalist.h @@ -89,6 +89,7 @@ JITMETADATAMETRIC(NewBoxedValueClassHelperCalls, int, 0) JITMETADATAMETRIC(StackAllocatedBoxedValueClasses, int, 0) JITMETADATAMETRIC(NewArrayHelperCalls, int, 0) JITMETADATAMETRIC(StackAllocatedArrays, int, 0) +JITMETADATAMETRIC(LocallocAllocatedArrays, int, 0) JITMETADATAMETRIC(LocalAssertionCount, int, 0) JITMETADATAMETRIC(LocalAssertionOverflow, int, 0) JITMETADATAMETRIC(MorphTrackedLocals, int, 0) diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index cc0925deaae544..6cd5518176d996 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -68,7 +68,7 @@ int LinearScan::BuildLclHeap(GenTree* tree) { internalIntCount = 0; } - else if (!m_compiler->info.compInitMem) + else if (!m_compiler->gtMustZeroLocalloc(tree)) { // No need to initialize allocated stack space. if (sizeVal < m_compiler->eeGetPageSize()) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 3e24519a5566d0..d9a7bb69345046 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1220,7 +1220,7 @@ int LinearScan::BuildNode(GenTree* tree) else { srcCount = 1; - if (!m_compiler->info.compInitMem) + if (!m_compiler->gtMustZeroLocalloc(tree)) { buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index c178715baa1150..a7a320034d0387 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -452,7 +452,7 @@ int LinearScan::BuildNode(GenTree* tree) { // Need no internal registers } - else if (!m_compiler->info.compInitMem) + else if (!m_compiler->gtMustZeroLocalloc(tree)) { // No need to initialize allocated stack space. if (sizeVal < m_compiler->eeGetPageSize()) @@ -471,7 +471,7 @@ int LinearScan::BuildNode(GenTree* tree) else { srcCount = 1; - if (!m_compiler->info.compInitMem) + if (!m_compiler->gtMustZeroLocalloc(tree)) { buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index cb0aaf6011b55b..ac2057f7763d59 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -619,7 +619,7 @@ int LinearScan::BuildNode(GenTree* tree) { // Need no internal registers } - else if (!m_compiler->info.compInitMem) + else if (!m_compiler->gtMustZeroLocalloc(tree)) { // No need to initialize allocated stack space. if (sizeVal < m_compiler->eeGetPageSize()) @@ -640,7 +640,7 @@ int LinearScan::BuildNode(GenTree* tree) else { srcCount = 1; - if (!m_compiler->info.compInitMem) + if (!m_compiler->gtMustZeroLocalloc(tree)) { buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 05781578c7c608..e76f68d429a34a 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1821,14 +1821,15 @@ int LinearScan::BuildLclHeap(GenTree* tree) size_t sizeVal = AlignUp((size_t)size->AsIntCon()->IconValue(), STACK_ALIGN); // Explicitly zeroed LCLHEAP also needs a regCnt in case of x86 or large page - if ((TARGET_POINTER_SIZE == 4) || (sizeVal >= m_compiler->eeGetPageSize())) + if ((TARGET_POINTER_SIZE == 4) || (sizeVal >= m_compiler->eeGetPageSize()) || + (tree->gtFlags & GTF_LCLHEAP_MUSTINIT)) { buildInternalIntRegisterDefForNode(tree); } } else { - if (!m_compiler->info.compInitMem) + if (!m_compiler->gtMustZeroLocalloc(tree)) { // For regCnt buildInternalIntRegisterDefForNode(tree); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 7c99a66f80bc9f..3bd2a463671a56 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -625,6 +625,8 @@ const char* getWellKnownArgName(WellKnownArg arg) return "X86TailCallSpecialArg"; case WellKnownArg::StackArrayLocal: return "StackArrayLocal"; + case WellKnownArg::StackArrayElemSize: + return "StackArrayElemSize"; case WellKnownArg::RuntimeMethodHandle: return "RuntimeMethodHandle"; case WellKnownArg::AsyncExecutionContext: diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp index 9f9e7baeaa6cd9..d2d11acd0f97b1 100644 --- a/src/coreclr/jit/objectalloc.cpp +++ b/src/coreclr/jit/objectalloc.cpp @@ -47,6 +47,8 @@ ObjectAllocator::ObjectAllocator(Compiler* comp) , m_ConnGraphAdjacencyMatrix(nullptr) , m_StackAllocMaxSize(0) , m_stackAllocationCount(0) + , m_UseLocalloc(false) + , m_UseLocallocInLoop(false) , m_EnumeratorLocalToPseudoIndexMap(comp->getAllocator(CMK_ObjectAllocator)) , m_CloneMap(comp->getAllocator(CMK_ObjectAllocator)) , m_nextLocalIndex(0) @@ -65,6 +67,11 @@ ObjectAllocator::ObjectAllocator(Compiler* comp) m_ConnGraphAdjacencyMatrix = nullptr; m_StackAllocMaxSize = (unsigned)JitConfig.JitObjectStackAllocationSize(); m_trackFields = JitConfig.JitObjectStackAllocationTrackFields() > 0; + + // OSR and AOT do not support the conditional localloc expansion. + // + m_UseLocalloc = JitConfig.JitObjectStackAllocationLocalloc() && !comp->opts.IsOSR() && !comp->IsAot(); + m_UseLocallocInLoop = m_UseLocalloc && JitConfig.JitObjectStackAllocationInLoop(); } //------------------------------------------------------------------------ @@ -1139,11 +1146,12 @@ void ObjectAllocator::ComputeStackObjectPointers(BitVecTraits* bitVecTraits) // lclNum - Local variable number // clsHnd - Class/struct handle of the variable class // allocType - Type of allocation (newobj or newarr) -// length - Length of the array (for newarr) +// length - Length of the array (for newarr); 1 for runtime-determined size // blockSize - [out, optional] exact size of the object // reason - [out, required] if result is false, reason why // preliminaryCheck - if true, allow checking before analysis is done // (for things that inherently disqualify the local) +// lengthKnown - true if length is known at compile time (default true) // // Return Value: // Returns true iff local variable can be allocated on the stack. @@ -1154,7 +1162,8 @@ bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNum, ssize_t length, unsigned int* blockSize, const char** reason, - bool preliminaryCheck) + bool preliminaryCheck, + bool lengthKnown) { assert(preliminaryCheck || m_AnalysisDone); @@ -1199,6 +1208,15 @@ bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNum, ClassLayout* const layout = m_compiler->typGetArrayLayout(clsHnd, (unsigned)length); classSize = layout->GetSize(); + + if (!lengthKnown && layout->HasGCPtr()) + { + // We can't represent GC info for runtime-sized stack arrays yet. + // + assert(length == 1); + *reason = "[unknown length, gc elements]"; + return false; + } } else if (allocType == OAT_NEWOBJ) { @@ -1296,9 +1314,13 @@ ObjectAllocator::ObjectAllocationType ObjectAllocator::AllocationKind(GenTree* t case CORINFO_HELP_NEWARR_1_DIRECT: case CORINFO_HELP_NEWARR_1_ALIGN8: { - if ((call->gtArgs.CountUserArgs() == 2) && call->gtArgs.GetUserArgByIndex(1)->GetNode()->IsCnsIntOrI()) + if (call->gtArgs.CountUserArgs() == 2) { - allocType = OAT_NEWARR; + GenTree* const lenArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + if (lenArg->IsCnsIntOrI() || m_UseLocalloc) + { + allocType = OAT_NEWARR; + } } break; } @@ -1331,9 +1353,8 @@ bool ObjectAllocator::MorphAllocObjNodes() for (BasicBlock* const block : m_compiler->Blocks()) { - const bool basicBlockHasNewObj = block->HasFlag(BBF_HAS_NEWOBJ); - const bool basicBlockHasNewArr = block->HasFlag(BBF_HAS_NEWARR); - const bool basicBlockHasBackwardJump = block->HasFlag(BBF_BACKWARD_JUMP); + const bool basicBlockHasNewObj = block->HasFlag(BBF_HAS_NEWOBJ); + const bool basicBlockHasNewArr = block->HasFlag(BBF_HAS_NEWARR); if (!basicBlockHasNewObj && !basicBlockHasNewArr) { @@ -1388,8 +1409,15 @@ void ObjectAllocator::MorphAllocObjNode(AllocationCandidate& candidate) // We keep the set of possibly-stack-pointing pointers as a superset of the set of // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both // sets. - MarkLclVarAsDefinitelyStackPointing(lclNum); MarkLclVarAsPossiblyStackPointing(lclNum); + if (candidate.m_definitelyStackPointing) + { + MarkLclVarAsDefinitelyStackPointing(lclNum); + } + else if (IsTrackedLocal(lclNum)) + { + AddConnGraphEdgeIndex(LocalToIndex(lclNum), m_unknownSourceIndex); + } // If this was conditionally escaping enumerator, establish a connection between this local // and the enumeratorLocal we already allocated. This is needed because we do early rewriting @@ -1460,8 +1488,11 @@ bool ObjectAllocator::MorphAllocObjNodeHelper(AllocationCandidate& candidate) } // Don't attempt to do stack allocations inside basic blocks that may be in a loop. + // Exception: runtime-sized newarrs may go via localloc when m_UseLocallocInLoop is set; + // those are filtered later in MorphAllocObjNodeHelperArr. // - if (candidate.m_block->HasFlag(BBF_BACKWARD_JUMP)) + if (candidate.m_block->HasFlag(BBF_BACKWARD_JUMP) && + !((candidate.m_allocType == OAT_NEWARR) && m_UseLocallocInLoop)) { candidate.m_onHeapReason = "[alloc in loop]"; return false; @@ -1602,8 +1633,36 @@ bool ObjectAllocator::MorphAllocObjNodeHelperArr(AllocationCandidate& candidate) if (!len->IsCnsIntOrI()) { - candidate.m_onHeapReason = "[non-constant array size]"; - return false; + if (!m_UseLocalloc) + { + candidate.m_onHeapReason = "[non-constant array size]"; + return false; + } + + if (candidate.m_block->hasHndIndex()) + { + candidate.m_onHeapReason = "[non-constant array size, in handler]"; + return false; + } + + // Runtime-sized array: try to allocate via localloc. + // Pass length=1 with lengthKnown=false so layout-based checks (e.g. GC pointer guard) run. + // + if (!CanAllocateLclVarOnStack(candidate.m_lclNum, clsHnd, candidate.m_allocType, /* length */ 1, &blockSize, + &candidate.m_onHeapReason, /* preliminaryCheck */ false, + /* lengthKnown */ false)) + { + // reason set by the call + return false; + } + + JITDUMP("Allocating V%02u on the stack [via localloc]\n", candidate.m_lclNum); + MorphNewArrNodeIntoLocAlloc(data->AsCall(), clsHnd, len, candidate.m_block, candidate.m_statement); + m_compiler->Metrics.LocallocAllocatedArrays++; + // helperexpansion may take the heap fallback at runtime, so the local is only + // possibly (not definitely) stack-pointing and must remain GC-reportable. + candidate.m_definitelyStackPointing = false; + return true; } if (!CanAllocateLclVarOnStack(candidate.m_lclNum, clsHnd, candidate.m_allocType, len->AsIntCon()->IconValue(), @@ -1613,6 +1672,39 @@ bool ObjectAllocator::MorphAllocObjNodeHelperArr(AllocationCandidate& candidate) return false; } + // If a constant-sized newarr ended up here despite being in a loop, we must dispatch + // through localloc (the loop-check exemption only applies to OAT_NEWARR with m_UseLocallocInLoop). + // + if (candidate.m_block->HasFlag(BBF_BACKWARD_JUMP)) + { + assert(m_UseLocallocInLoop); + + if (candidate.m_block->hasHndIndex()) + { + candidate.m_onHeapReason = "[alloc in loop, in handler]"; + return false; + } + + // Conditional localloc does not have a fixed layout in the frame, so we + // cannot report GC element slots for it. + // + if (!CanAllocateLclVarOnStack(candidate.m_lclNum, clsHnd, candidate.m_allocType, /* length */ 1, &blockSize, + &candidate.m_onHeapReason, /* preliminaryCheck */ false, + /* lengthKnown */ false)) + { + // reason set by the call + return false; + } + + JITDUMP("Allocating V%02u on the stack [via localloc, in loop]\n", candidate.m_lclNum); + MorphNewArrNodeIntoLocAlloc(data->AsCall(), clsHnd, len, candidate.m_block, candidate.m_statement); + m_compiler->Metrics.LocallocAllocatedArrays++; + // helperexpansion may take the heap fallback at runtime, so the local is only + // possibly (not definitely) stack-pointing and must remain GC-reportable. + candidate.m_definitelyStackPointing = false; + return true; + } + JITDUMP("Allocating V%02u on the stack\n", candidate.m_lclNum); const unsigned int stackLclNum = MorphNewArrNodeIntoStackAlloc(data->AsCall(), clsHnd, (unsigned int)len->AsIntCon()->IconValue(), blockSize, @@ -1836,6 +1928,63 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall* return lclNum; } +//------------------------------------------------------------------------ +// MorphNewArrNodeIntoLocAlloc: Morph a newarray helper call node into a local frame allocation. +// +// Arguments: +// newArr - GT_CALL that will be replaced by helper call. +// clsHnd - class representing the type of the array +// length - operand for length of the array +// block - a basic block where newArr is +// stmt - a statement where newArr is +// +void ObjectAllocator::MorphNewArrNodeIntoLocAlloc( + GenTreeCall* newArr, CORINFO_CLASS_HANDLE clsHnd, GenTree* length, BasicBlock* block, Statement* stmt) +{ + assert(newArr != nullptr); + assert(m_AnalysisDone); + assert(clsHnd != NO_CLASS_HANDLE); + assert(newArr->IsHelperCall()); + assert(newArr->GetHelperNum() != CORINFO_HELP_NEWARR_1_MAYBEFROZEN); + + // Get element size + // + CORINFO_CLASS_HANDLE elemClsHnd = NO_CLASS_HANDLE; + CorInfoType corType = m_compiler->info.compCompHnd->getChildType(clsHnd, &elemClsHnd); + var_types type = JITtype2varType(corType); + ClassLayout* elemLayout = type == TYP_STRUCT ? m_compiler->typGetObjLayout(elemClsHnd) : nullptr; + + const unsigned elemSize = elemLayout != nullptr ? elemLayout->GetSize() : genTypeSize(type); + + // Mark the newarr call as being "on stack", and add the element size + // operand for the stack local as an argument + // + GenTree* const elemSizeNode = m_compiler->gtNewIconNode(elemSize, TYP_I_IMPL); + newArr->gtArgs.PushBack(m_compiler, + NewCallArg::Primitive(elemSizeNode).WellKnown(WellKnownArg::StackArrayElemSize)); + newArr->gtCallMoreFlags |= GTF_CALL_M_STACK_ARRAY; + + // Retype the call result as a byref (we may decide to heap allocate at runtime). + // + newArr->ChangeType(TYP_BYREF); + newArr->gtReturnType = TYP_BYREF; + + // Note that we have stack allocated arrays in this method + // + m_compiler->setMethodHasStackAllocatedArray(); + + // Notify the compiler; this disables fast tail calls (for now) + // + m_compiler->compLocallocUsed = true; + +#ifdef UNIX_AMD64_ABI + // Ensure we don't end up with misaligned frames, + // if we manage to dead code this newarr. + // + m_compiler->opts.compNeedToAlignFrame = true; +#endif +} + //------------------------------------------------------------------------ // MorphAllocObjNodeIntoStackAlloc: Morph a GT_ALLOCOBJ node into stack // allocation. @@ -2510,11 +2659,14 @@ void ObjectAllocator::UpdateAncestorTypes( var_types parentType = parent->TypeGet(); assert(parentType != TYP_REF); - // New type can be TYP_I_IMPL, TYP_BYREF. - // But TYP_BYREF only if parent is also - // if (parentType != newType) { + if (newType == TYP_BYREF) + { + // The subtraction produces a native int; keep the parent type. + break; + } + // We must be retyping TYP_BYREF to TYP_I_IMPL. // assert(newType == TYP_I_IMPL); @@ -2689,6 +2841,7 @@ void ObjectAllocator::UpdateAncestorTypes( } case GT_CALL: + // Watch for helper calls that have retyped operands...? break; default: @@ -2989,7 +3142,6 @@ void ObjectAllocator::RewriteUses() if (!layout->HasGCPtr()) { - assert(newType == TYP_I_IMPL); JITDUMP("V%02u not GC\n", lclNum); lclVarDsc->lvTracked = 0; continue; diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h index 7af1b11788c980..10c319fa68a994 100644 --- a/src/coreclr/jit/objectalloc.h +++ b/src/coreclr/jit/objectalloc.h @@ -149,6 +149,7 @@ class ObjectAllocator final : public Phase , m_allocType(allocType) , m_onHeapReason(nullptr) , m_bashCall(false) + , m_definitelyStackPointing(true) { } @@ -159,6 +160,12 @@ class ObjectAllocator final : public Phase ObjectAllocationType const m_allocType; const char* m_onHeapReason; bool m_bashCall; + // True if a successful stack-allocation of this candidate yields a local that + // definitely points at stack memory. False when the morph leaves a runtime + // heap fallback in place (e.g. the localloc/heapalloc split for runtime-sized + // arrays); in that case the local is only possibly stack-pointing and must + // remain GC-reportable. + bool m_definitelyStackPointing; }; typedef SmallHashTable LocalToLocalMap; @@ -184,6 +191,8 @@ class ObjectAllocator final : public Phase BitSetShortLongRep* m_ConnGraphAdjacencyMatrix; unsigned int m_StackAllocMaxSize; unsigned m_stackAllocationCount; + bool m_UseLocalloc; + bool m_UseLocallocInLoop; // Info for conditionally-escaping locals LocalToLocalMap m_EnumeratorLocalToPseudoIndexMap; @@ -213,7 +222,8 @@ class ObjectAllocator final : public Phase ssize_t length, unsigned int* blockSize, const char** reason, - bool preliminaryCheck = false); + bool preliminaryCheck = false, + bool lengthKnown = true); static GenTree* IsGuard(BasicBlock* block, GuardInfo* info); @@ -265,6 +275,8 @@ class ObjectAllocator final : public Phase unsigned int blockSize, BasicBlock* block, Statement* stmt); + void MorphNewArrNodeIntoLocAlloc( + GenTreeCall* newArr, CORINFO_CLASS_HANDLE clsHnd, GenTree* length, BasicBlock* block, Statement* stmt); struct BuildConnGraphVisitorCallbackData; void AnalyzeParentStack(ArrayStack* parentStack, unsigned int lclNum, BasicBlock* block); void UpdateAncestorTypes( diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 86300e47383b0f..06949e7955ae62 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1564,6 +1564,11 @@ void HelperCallProperties::init() exceptions = ExceptionSetFlags::None; break; + case CORINFO_HELP_CAN_STACK_ALLOCATE: + exceptions = ExceptionSetFlags::None; + isNoEscape = true; + break; + // type casting helpers that throw case CORINFO_HELP_CHKCASTINTERFACE: case CORINFO_HELP_CHKCASTARRAY: diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 68f7b52f6ec048..4f809a08de8541 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -183,6 +183,7 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_TAILCALL, // Perform a tail call CORINFO_HELP_GETCURRENTMANAGEDTHREADID, + CORINFO_HELP_CAN_STACK_ALLOCATE, // decide whether a conditional localloc is safe CORINFO_HELP_INIT_PINVOKE_FRAME, // initialize an inlined PInvoke Frame for the JIT-compiler diff --git a/src/coreclr/vm/corelib.h b/src/coreclr/vm/corelib.h index fa6f4000edaa7e..7e7a7ca4ff203f 100644 --- a/src/coreclr/vm/corelib.h +++ b/src/coreclr/vm/corelib.h @@ -692,6 +692,7 @@ DEFINE_METHOD(RUNTIME_HELPERS, ENUM_EQUALS, EnumEquals, NoSig) DEFINE_METHOD(RUNTIME_HELPERS, ENUM_COMPARE_TO, EnumCompareTo, NoSig) DEFINE_METHOD(RUNTIME_HELPERS, ALLOC_TAILCALL_ARG_BUFFER, AllocTailCallArgBuffer, NoSig) DEFINE_METHOD(RUNTIME_HELPERS, DISPATCH_TAILCALLS, DispatchTailCalls, NoSig) +DEFINE_METHOD(RUNTIME_HELPERS, CAN_STACK_ALLOCATE, CanStackAllocate, SM_UIntPtr_PtrUIntPtr_RetBool) #ifdef FEATURE_IJW DEFINE_METHOD(RUNTIME_HELPERS, COPY_CONSTRUCT, CopyConstruct, NoSig) #endif // FEATURE_IJW diff --git a/src/coreclr/vm/ecalllist.h b/src/coreclr/vm/ecalllist.h index d0ac7d28c6f747..de565ae24ca0c8 100644 --- a/src/coreclr/vm/ecalllist.h +++ b/src/coreclr/vm/ecalllist.h @@ -336,6 +336,7 @@ FCFuncStart(gRuntimeHelpers) FCFuncElement("TryGetHashCode", ObjectNative::TryGetHashCode) FCFuncElement("ContentEquals", ObjectNative::ContentEquals) FCFuncElement("TryEnsureSufficientExecutionStack", ReflectionInvocation::TryEnsureSufficientExecutionStack) + FCFuncElement("GetStackBounds", ReflectionInvocation::GetStackBounds) FCFuncElement("GetTailCallArgBuffer", TailCallHelp::GetTailCallArgBuffer) FCFuncElement("GetTailCallInfo", TailCallHelp::GetTailCallInfo) FCFuncEnd() diff --git a/src/coreclr/vm/metasig.h b/src/coreclr/vm/metasig.h index fb3801964ee0cf..312acc03a0653d 100644 --- a/src/coreclr/vm/metasig.h +++ b/src/coreclr/vm/metasig.h @@ -236,6 +236,7 @@ DEFINE_METASIG(SM(Long_Long_RetLong, l l, l)) DEFINE_METASIG(SM(ULong_ULong_RetULong, L L, L)) DEFINE_METASIG(SM(RefLong_Long_Long_RetLong, r(l) l l, l)) DEFINE_METASIG(SM(RefInt_Int_Int_RetInt, r(i) i i, i)) +DEFINE_METASIG(SM(UIntPtr_PtrUIntPtr_RetBool, U P(U), F)) DEFINE_METASIG(SM(ObjIntPtr_RetVoid, j I, v)) DEFINE_METASIG(SM(RefByte_RefByte_UInt_RetVoid, r(b) r(b) K, v)) diff --git a/src/coreclr/vm/reflectioninvocation.cpp b/src/coreclr/vm/reflectioninvocation.cpp index ea47eb65a156ce..c1b7e4a309ac8b 100644 --- a/src/coreclr/vm/reflectioninvocation.cpp +++ b/src/coreclr/vm/reflectioninvocation.cpp @@ -1439,6 +1439,16 @@ FCIMPL0(FC_BOOL_RET, ReflectionInvocation::TryEnsureSufficientExecutionStack) } FCIMPLEND +FCIMPL2(void, ReflectionInvocation::GetStackBounds, UINT_PTR* stackBase, UINT_PTR* stackLimit) +{ + FCALL_CONTRACT; + + Thread* pThread = GetThread(); + *stackBase = reinterpret_cast(pThread->GetCachedStackBase()); + *stackLimit = reinterpret_cast(pThread->GetCachedStackLimit()); +} +FCIMPLEND + #ifdef FEATURE_COMINTEROP extern "C" void QCALLTYPE ReflectionInvocation_InvokeDispMethod( QCall::ObjectHandleOnStack type, diff --git a/src/coreclr/vm/reflectioninvocation.h b/src/coreclr/vm/reflectioninvocation.h index cf75f5391f6d57..d9026f60cb9b23 100644 --- a/src/coreclr/vm/reflectioninvocation.h +++ b/src/coreclr/vm/reflectioninvocation.h @@ -46,6 +46,7 @@ class ReflectionInvocation { public: FCDECL0(static FC_BOOL_RET, TryEnsureSufficientExecutionStack); + FCDECL2(static void, GetStackBounds, UINT_PTR* stackBase, UINT_PTR* stackLimit); }; extern "C" void QCALLTYPE ReflectionInvocation_CompileMethod(MethodDesc * pMD); diff --git a/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs b/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs new file mode 100644 index 00000000000000..e9e189778d105b --- /dev/null +++ b/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs @@ -0,0 +1,226 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using TestLibrary; +using Xunit; + +enum AllocationKind +{ + Heap, + Stack, + Undefined +} + +delegate int Test(); + +public class LocallocStackAlloc +{ + static bool GCStressEnabled() + { + return Environment.GetEnvironmentVariable("DOTNET_GCStress") != null; + } + + static AllocationKind StackAllocation() + { + AllocationKind expectedAllocationKind = AllocationKind.Stack; + if (!OperatingSystem.IsWindows() || GCStressEnabled()) + { + Console.WriteLine("Allocation kind is not predictable"); + expectedAllocationKind = AllocationKind.Undefined; + } + return expectedAllocationKind; + } + + static AllocationKind HeapAllocation() + { + AllocationKind expectedAllocationKind = AllocationKind.Heap; + if (!OperatingSystem.IsWindows() || GCStressEnabled()) + { + Console.WriteLine("Allocation kind is not predictable"); + expectedAllocationKind = AllocationKind.Undefined; + } + return expectedAllocationKind; + } + + static int CallTestAndVerifyAllocation(Test test, int expectedResult, AllocationKind expectedAllocationsKind, bool throws = false) + { + string methodName = test.Method.Name; + try + { + long allocatedBytesBefore = GC.GetAllocatedBytesForCurrentThread(); + int testResult = test(); + long allocatedBytesAfter = GC.GetAllocatedBytesForCurrentThread(); + + if (throws) + { + Console.WriteLine($"FAILURE ({methodName}): expected exception, got {testResult}"); + return -1; + } + + if (testResult != expectedResult) + { + Console.WriteLine($"FAILURE ({methodName}): expected {expectedResult}, got {testResult}"); + return -1; + } + + if ((expectedAllocationsKind == AllocationKind.Stack) && (allocatedBytesBefore != allocatedBytesAfter)) + { + Console.WriteLine($"FAILURE ({methodName}): unexpected allocation of {allocatedBytesAfter - allocatedBytesBefore} bytes"); + return -1; + } + + if ((expectedAllocationsKind == AllocationKind.Heap) && (allocatedBytesBefore == allocatedBytesAfter)) + { + Console.WriteLine($"FAILURE ({methodName}): unexpected stack allocation"); + return -1; + } + + Console.WriteLine($"SUCCESS ({methodName})"); + return 100; + } + catch (Exception e) + { + if (throws) + { + Console.WriteLine($"SUCCESS ({methodName}) caught {e.GetType().Name}"); + return 100; + } + Console.WriteLine($"FAILURE ({methodName}): unexpected {e.GetType().Name}: {e.Message}"); + return -1; + } + } + + // Keep JIT from constant-folding the length. + [MethodImpl(MethodImplOptions.NoInlining)] + static int OpaqueLength(int n) => n; + + // Variable-length stack-allocated int[] within the localloc threshold. + // Sums the elements after writing them. + [MethodImpl(MethodImplOptions.NoInlining)] + static int VariableLengthSmall() + { + int n = OpaqueLength(8); + int[] array = new int[n]; + int sum = 0; + for (int i = 0; i < array.Length; i++) + { + array[i] = i + 1; + } + for (int i = 0; i < array.Length; i++) + { + sum += array[i]; + } + return sum + array.Length; + } + + // Variable-length newarr that exceeds the stack-alloc threshold; should be + // routed to the heap helper at runtime instead of corrupting the stack. + [MethodImpl(MethodImplOptions.NoInlining)] + static int VariableLengthLarge() + { + int n = OpaqueLength(100_000); + int[] array = new int[n]; + int sum = 0; + for (int i = 0; i < array.Length; i++) + { + array[i] = 1; + } + for (int i = 0; i < array.Length; i++) + { + sum += array[i]; + } + return sum; + } + + // Negative length must throw OverflowException via the heap helper + // even when the localloc dispatch path is selected. + [MethodImpl(MethodImplOptions.NoInlining)] + static int VariableLengthNegative() + { + int n = OpaqueLength(-1); + int[] array = new int[n]; + return array.Length; + } + + // int.MinValue length must also throw OverflowException; this is the case + // where signed totalSize wraps to a small value if not guarded properly. + [MethodImpl(MethodImplOptions.NoInlining)] + static int VariableLengthIntMin() + { + int n = OpaqueLength(int.MinValue); + int[] array = new int[n]; + return array.Length; + } + + // Length near INT32_MAX with large element size: elemSize * length overflows. + // Helper should raise OutOfMemoryException; no stack corruption. + [MethodImpl(MethodImplOptions.NoInlining)] + static int VariableLengthHuge() + { + int n = OpaqueLength(int.MaxValue); + long[] array = new long[n]; + return array.Length; + } + + // Repeatedly allocate a small variable-length array within a single + // method invocation. The per-frame budget caps total localloc bytes, so + // after enough iterations the remaining allocations must fall back to + // the heap rather than growing the frame without bound. + [MethodImpl(MethodImplOptions.NoInlining)] + static int VariableLengthFrameBudget() + { + int sum = 0; + for (int iter = 0; iter < 200; iter++) + { + int n = OpaqueLength(64); + int[] array = new int[n]; + for (int i = 0; i < array.Length; i++) + { + array[i] = i + 1; + } + for (int i = 0; i < array.Length; i++) + { + sum += array[i]; + } + } + return sum; + } + + [ActiveIssue("needs triage", TestRuntimes.Mono)] + [Fact] + public static int TestSmall() + { + VariableLengthSmall(); + return CallTestAndVerifyAllocation(VariableLengthSmall, 8 + (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8), StackAllocation()); + } + + [ActiveIssue("needs triage", TestRuntimes.Mono)] + [Fact] + public static int TestLarge() + { + VariableLengthLarge(); + return CallTestAndVerifyAllocation(VariableLengthLarge, 100_000, HeapAllocation()); + } + + [ActiveIssue("needs triage", TestRuntimes.Mono)] + [Fact] + public static int TestNegative() => CallTestAndVerifyAllocation(VariableLengthNegative, 0, AllocationKind.Undefined, throws: true); + + [ActiveIssue("needs triage", TestRuntimes.Mono)] + [Fact] + public static int TestIntMin() => CallTestAndVerifyAllocation(VariableLengthIntMin, 0, AllocationKind.Undefined, throws: true); + + [ActiveIssue("needs triage", TestRuntimes.Mono)] + [Fact] + public static int TestHuge() => CallTestAndVerifyAllocation(VariableLengthHuge, 0, AllocationKind.Undefined, throws: true); + + [ActiveIssue("needs triage", TestRuntimes.Mono)] + [Fact] + public static int TestFrameBudget() + { + VariableLengthFrameBudget(); + return CallTestAndVerifyAllocation(VariableLengthFrameBudget, 200 * ((64 * 65) / 2), HeapAllocation()); + } +} diff --git a/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.csproj b/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.csproj new file mode 100644 index 00000000000000..993c32962762b9 --- /dev/null +++ b/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.csproj @@ -0,0 +1,15 @@ + + + + true + None + True + true + + + + + + + +