Skip to content

Commit 5ec0c7a

Browse files
authored
Remove extra UnmanagedCallersOnly overhead on x86 (dotnet#46238)
* Implement emitting an unmanaged calling convention entry point with the correct argument order and register usage on x86. * Move Unix x86 to the UnmanagedCallersOnly plan now that we don't need to do argument shuffling. * Add SEH hookup and profiler/debugger hooks to Reverse P/Invoke entry helper to match custom x86 thunk. Fixes dotnet#46177 * Remove Windows x86 assembly stub for individual reverse p/invokes. Move Windows x86 unmanaged callers only to not have extra overhead and put reverse P/Invoke stubs for Windows x86 on the UnmanagedCallersOnly plan. * Further cleanup * Remove extraneous UnmanagedCallersOnly block now that x86 UnmanagedCallersOnly has been simplified. * Undo ArgOrder size specifier since it isn't needed and it doesn't work. * Fix copy constructor reverse marshalling. Now that we don't have the emitted unmanaged thunk stub, we need to handle the x86 differences for copy-constructed parameters in the IL stub. * Fix version guid syntax. * Remove FastNExportHandler. * Revert "Remove FastNExportHandler." This reverts commit 423f70e. * Fix setting up entry frame for new thread. * Allow the NExportSEH record to live below ESP so we don't need to create a new stack frame. * Fix formatting. * Assign an offset for the return buffer on x86 since it might come in on the stack. * Make sure we use the TC block we just put in on x86 as well. * Shrink the ReversePInvokeFrame on non-x86 back to master's size. * Fix arch-specific R2R constant. * Pass the return address of the ReversePInvokeEnter helper to TraceCall instead of the entry point and call TraceCall from all JIT_ReversePInvokeEnter* helpers. * Fix ILVerification and ILVerify * fix R2R constants for crossgen1 * Don't assert ReversePInvokeFrame size for cross-bitness scenarios.
1 parent 5aef85a commit 5ec0c7a

40 files changed

Lines changed: 326 additions & 1491 deletions

src/coreclr/inc/corinfo.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -700,12 +700,16 @@ enum class CorInfoCallConvExtension
700700
// New calling conventions supported with the extensible calling convention encoding go here.
701701
};
702702

703-
#ifdef UNIX_X86_ABI
703+
#ifdef TARGET_X86
704704
inline bool IsCallerPop(CorInfoCallConvExtension callConv)
705705
{
706+
#ifdef UNIX_X86_ABI
706707
return callConv == CorInfoCallConvExtension::Managed || callConv == CorInfoCallConvExtension::C;
707-
}
708+
#else
709+
return callConv == CorInfoCallConvExtension::C;
708710
#endif // UNIX_X86_ABI
711+
}
712+
#endif
709713

710714
// Determines whether or not this calling convention is an instance method calling convention.
711715
inline bool callConvIsInstanceMethodCallConv(CorInfoCallConvExtension callConv)

src/coreclr/inc/jiteeversionguid.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
// be changed. This is the identifier verified by ICorJitCompiler::getVersionIdentifier().
1313
//
1414
// You can use "uuidgen.exe -s" to generate this value.
15-
//
15+
//
1616
// Note that this file is parsed by some tools, namely superpmi.py, so make sure the first line is exactly
1717
// of the form:
1818
//
@@ -30,12 +30,13 @@
3030
// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
3131
//
3232
//////////////////////////////////////////////////////////////////////////////////////////////////////////
33+
//
3334

34-
constexpr GUID JITEEVersionIdentifier = { /* f556df6c-b9c7-479c-b895-8e1f1959fe59 */
35-
0xf556df6c,
36-
0xb9c7,
37-
0x479c,
38-
{0xb8, 0x95, 0x8e, 0x1f, 0x19, 0x59, 0xfe, 0x59}
35+
constexpr GUID JITEEVersionIdentifier = { /* 768493d2-21cb-41e6-b06d-e62131fd0fc2 */
36+
0x768493d2,
37+
0x21cb,
38+
0x41e6,
39+
{0xb0, 0x6d, 0xe6, 0x21, 0x31, 0xfd, 0x0f, 0xc2}
3940
};
4041

4142
//////////////////////////////////////////////////////////////////////////////////////////////////////////

src/coreclr/inc/readytorun.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,11 @@ struct READYTORUN_EXCEPTION_CLAUSE
397397
enum ReadyToRunRuntimeConstants : DWORD
398398
{
399399
READYTORUN_PInvokeTransitionFrameSizeInPointerUnits = 11,
400-
READYTORUN_ReversePInvokeTransitionFrameSizeInPointerUnits = 2
400+
#ifdef TARGET_X86
401+
READYTORUN_ReversePInvokeTransitionFrameSizeInPointerUnits = 5,
402+
#else
403+
READYTORUN_ReversePInvokeTransitionFrameSizeInPointerUnits = 2,
404+
#endif
401405
};
402406

403407
enum ReadyToRunHFAElemType : DWORD

src/coreclr/jit/codegencommon.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8896,10 +8896,8 @@ void CodeGen::genFnEpilog(BasicBlock* block)
88968896
if (compiler->info.compIsVarArgs)
88978897
fCalleePop = false;
88988898

8899-
#ifdef UNIX_X86_ABI
89008899
if (IsCallerPop(compiler->info.compCallConv))
89018900
fCalleePop = false;
8902-
#endif // UNIX_X86_ABI
89038901

89048902
if (fCalleePop)
89058903
{

src/coreclr/jit/compiler.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6184,10 +6184,12 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
61846184
{
61856185
bool unused;
61866186
info.compCallConv = info.compCompHnd->getUnmanagedCallConv(methodInfo->ftn, nullptr, &unused);
6187+
info.compArgOrder = Target::g_tgtUnmanagedArgOrder;
61876188
}
61886189
else
61896190
{
61906191
info.compCallConv = CorInfoCallConvExtension::Managed;
6192+
info.compArgOrder = Target::g_tgtArgOrder;
61916193
}
61926194

61936195
info.compIsVarArgs = false;

src/coreclr/jit/compiler.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9379,6 +9379,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
93799379
// current number of EH clauses (after additions like synchronized
93809380
// methods and funclets, and removals like unreachable code deletion).
93819381

9382+
Target::ArgOrder compArgOrder;
9383+
93829384
bool compMatchedVM; // true if the VM is "matched": either the JIT is a cross-compiler
93839385
// and the VM expects that, or the JIT is a "self-host" compiler
93849386
// (e.g., x86 hosted targeting x86) and the VM expects that.
@@ -9458,6 +9460,14 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
94589460
return (info.compRetBuffArg != BAD_VAR_NUM);
94599461
}
94609462
#endif // TARGET_WINDOWS && TARGET_ARM64
9463+
// 4. x86 unmanaged calling conventions require the address of RetBuff to be returned in eax.
9464+
CLANG_FORMAT_COMMENT_ANCHOR;
9465+
#if defined(TARGET_X86)
9466+
if (info.compCallConv != CorInfoCallConvExtension::Managed)
9467+
{
9468+
return (info.compRetBuffArg != BAD_VAR_NUM);
9469+
}
9470+
#endif
94619471

94629472
return false;
94639473
#endif // TARGET_AMD64

src/coreclr/jit/flowgraph.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8683,13 +8683,29 @@ void Compiler::fgAddReversePInvokeEnterExit()
86838683
varDsc->lvType = TYP_BLK;
86848684
varDsc->lvExactSize = eeGetEEInfo()->sizeOfReversePInvokeFrame;
86858685

8686-
GenTree* tree;
8687-
86888686
// Add enter pinvoke exit callout at the start of prolog
86898687

8690-
tree = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(lvaReversePInvokeFrameVar, TYP_BLK));
8688+
GenTree* pInvokeFrameVar = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(lvaReversePInvokeFrameVar, TYP_BLK));
8689+
8690+
GenTree* stubArgument;
8691+
8692+
if (info.compPublishStubParam)
8693+
{
8694+
// If we have a secret param for a Reverse P/Invoke, that means that we are in an IL stub.
8695+
// In this case, the method handle we pass down to the Reverse P/Invoke helper should be
8696+
// the target method, which is passed in the secret parameter.
8697+
stubArgument = gtNewLclvNode(lvaStubArgumentVar, TYP_I_IMPL);
8698+
}
8699+
else
8700+
{
8701+
stubArgument = gtNewIconNode(0, TYP_I_IMPL);
8702+
}
8703+
8704+
GenTree* tree;
8705+
8706+
GenTreeCall::Use* args = gtNewCallArgs(pInvokeFrameVar, gtNewIconEmbMethHndNode(info.compMethodHnd), stubArgument);
86918707

8692-
tree = gtNewHelperCallNode(CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER, TYP_VOID, gtNewCallArgs(tree));
8708+
tree = gtNewHelperCallNode(CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER, TYP_VOID, args);
86938709

86948710
fgEnsureFirstBBisScratch();
86958711

src/coreclr/jit/importer.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17334,6 +17334,12 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode)
1733417334
{
1733517335
op1 = gtNewOperNode(GT_RETURN, TYP_BYREF, gtNewLclvNode(info.compRetBuffArg, TYP_BYREF));
1733617336
}
17337+
#endif
17338+
#if defined(TARGET_X86)
17339+
else if (info.compCallConv != CorInfoCallConvExtension::Managed)
17340+
{
17341+
op1 = gtNewOperNode(GT_RETURN, TYP_BYREF, gtNewLclvNode(info.compRetBuffArg, TYP_BYREF));
17342+
}
1733717343
#endif
1733817344
else
1733917345
{

src/coreclr/jit/lclvars.cpp

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,29 @@ void Compiler::lvaInitTypeRef()
235235
//-------------------------------------------------------------------------
236236

237237
InitVarDscInfo varDscInfo;
238-
varDscInfo.Init(lvaTable, hasRetBuffArg);
238+
#ifdef TARGET_X86
239+
// x86 unmanaged calling conventions limit the number of registers supported
240+
// for accepting arguments. As a result, we need to modify the number of registers
241+
// when we emit a method with an unmanaged calling convention.
242+
switch (info.compCallConv)
243+
{
244+
case CorInfoCallConvExtension::Thiscall:
245+
// In thiscall the this parameter goes into a register.
246+
varDscInfo.Init(lvaTable, hasRetBuffArg, 1, 0);
247+
break;
248+
case CorInfoCallConvExtension::C:
249+
case CorInfoCallConvExtension::Stdcall:
250+
varDscInfo.Init(lvaTable, hasRetBuffArg, 0, 0);
251+
break;
252+
case CorInfoCallConvExtension::Managed:
253+
case CorInfoCallConvExtension::Fastcall:
254+
default:
255+
varDscInfo.Init(lvaTable, hasRetBuffArg, MAX_REG_ARG, MAX_FLOAT_REG_ARG);
256+
break;
257+
}
258+
#else
259+
varDscInfo.Init(lvaTable, hasRetBuffArg, MAX_REG_ARG, MAX_FLOAT_REG_ARG);
260+
#endif
239261

240262
lvaInitArgs(&varDscInfo);
241263

@@ -513,14 +535,16 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo, bool useFixedRetBuf
513535
info.compRetBuffArg = varDscInfo->varNum;
514536
varDsc->lvType = TYP_BYREF;
515537
varDsc->lvIsParam = 1;
516-
varDsc->lvIsRegArg = 1;
538+
varDsc->lvIsRegArg = 0;
517539

518540
if (useFixedRetBufReg && hasFixedRetBuffReg())
519541
{
542+
varDsc->lvIsRegArg = 1;
520543
varDsc->SetArgReg(theFixedRetBuffReg());
521544
}
522-
else
545+
else if (varDscInfo->canEnreg(TYP_INT))
523546
{
547+
varDsc->lvIsRegArg = 1;
524548
unsigned retBuffArgNum = varDscInfo->allocRegArg(TYP_INT);
525549
varDsc->SetArgReg(genMapIntRegArgNumToRegNum(retBuffArgNum));
526550
}
@@ -557,10 +581,10 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo, bool useFixedRetBuf
557581
}
558582
#endif // FEATURE_SIMD
559583

560-
assert(isValidIntArgReg(varDsc->GetArgReg()));
584+
assert(!varDsc->lvIsRegArg || isValidIntArgReg(varDsc->GetArgReg()));
561585

562586
#ifdef DEBUG
563-
if (verbose)
587+
if (varDsc->lvIsRegArg && verbose)
564588
{
565589
printf("'__retBuf' passed in register %s\n", getRegName(varDsc->GetArgReg()));
566590
}
@@ -591,7 +615,10 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
591615

592616
#if defined(TARGET_X86)
593617
// Only (some of) the implicit args are enregistered for varargs
594-
varDscInfo->maxIntRegArgNum = info.compIsVarArgs ? varDscInfo->intRegArgNum : MAX_REG_ARG;
618+
if (info.compIsVarArgs)
619+
{
620+
varDscInfo->maxIntRegArgNum = varDscInfo->intRegArgNum;
621+
}
595622
#elif defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
596623
// On System V type environment the float registers are not indexed together with the int ones.
597624
varDscInfo->floatRegArgNum = varDscInfo->intRegArgNum;
@@ -5345,7 +5372,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
53455372
This is all relative to our Virtual '0'
53465373
*/
53475374

5348-
if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R)
5375+
if (info.compArgOrder == Target::ARG_ORDER_L2R)
53495376
{
53505377
argOffs = compArgSize;
53515378
}
@@ -5357,9 +5384,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
53575384
noway_assert(compArgSize >= codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES);
53585385
#endif
53595386

5360-
#ifdef TARGET_X86
5361-
argOffs -= codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
5362-
#endif
5387+
if (info.compArgOrder == Target::ARG_ORDER_L2R)
5388+
{
5389+
argOffs -= codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
5390+
}
53635391

53645392
// Update the arg initial register locations.
53655393
lvaUpdateArgsWithInitialReg();
@@ -5398,11 +5426,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
53985426
if (info.compRetBuffArg != BAD_VAR_NUM)
53995427
{
54005428
noway_assert(lclNum == info.compRetBuffArg);
5401-
noway_assert(lvaTable[lclNum].lvIsRegArg);
5402-
#ifndef TARGET_X86
54035429
argOffs =
54045430
lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
5405-
#endif // TARGET_X86
54065431
lclNum++;
54075432
}
54085433

@@ -5553,7 +5578,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
55535578
noway_assert(lclNum < info.compArgsCount);
55545579
noway_assert(argSize);
55555580

5556-
if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R)
5581+
if (info.compArgOrder == Target::ARG_ORDER_L2R)
55575582
{
55585583
argOffs -= argSize;
55595584
}
@@ -5621,7 +5646,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
56215646
}
56225647
}
56235648

5624-
if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
5649+
if (info.compArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
56255650
{
56265651
argOffs += argSize;
56275652
}
@@ -5646,7 +5671,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
56465671
noway_assert(lclNum < info.compArgsCount);
56475672
noway_assert(argSize);
56485673

5649-
if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R)
5674+
if (info.compArgOrder == Target::ARG_ORDER_L2R)
56505675
{
56515676
argOffs -= argSize;
56525677
}
@@ -5925,7 +5950,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
59255950
}
59265951
}
59275952

5928-
if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
5953+
if (info.compArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
59295954
{
59305955
argOffs += argSize;
59315956
}

src/coreclr/jit/register_arg_convention.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@ struct InitVarDscInfo
3333

3434
public:
3535
// set to initial values
36-
void Init(LclVarDsc* lvaTable, bool _hasRetBufArg)
36+
void Init(LclVarDsc* lvaTable, bool _hasRetBufArg, unsigned _maxIntRegArgNum, unsigned _maxFloatRegArgNum)
3737
{
3838
hasRetBufArg = _hasRetBufArg;
3939
varDsc = &lvaTable[0]; // the first argument LclVar 0
4040
varNum = 0; // the first argument varNum 0
4141
intRegArgNum = 0;
4242
floatRegArgNum = 0;
43-
maxIntRegArgNum = MAX_REG_ARG;
44-
maxFloatRegArgNum = MAX_FLOAT_REG_ARG;
43+
maxIntRegArgNum = _maxIntRegArgNum;
44+
maxFloatRegArgNum = _maxFloatRegArgNum;
4545

4646
#ifdef TARGET_ARM
4747
fltArgSkippedRegMask = RBM_NONE;

0 commit comments

Comments
 (0)