Skip to content

Commit 49b26e4

Browse files
authored
[clr-interp] Implement cached virtual/interface dispatch (#123815)
We create a simple hashtable (InterpDispatchCache) that maps DispatchToken + target MT to target method to be called. The cache is similar to the `DispatchCache` used by VSD. It holds a single mapping per index, when a collision happens the entry will be replaced with the new one. Replaced entries are freed during GC. The expectation is that there will be few collisions given only a subset of methods are being interpreted. This makes a microbenchmark from the suite 4x faster.
1 parent 8611948 commit 49b26e4

9 files changed

Lines changed: 268 additions & 14 deletions

File tree

src/coreclr/interpreter/compiler.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5182,11 +5182,18 @@ void InterpCompiler::EmitCall(CORINFO_RESOLVED_TOKEN* pConstrainedToken, bool re
51825182
break;
51835183
}
51845184
case CORINFO_VIRTUALCALL_VTABLE:
5185+
{
51855186
// Traditional virtual call. In theory we could optimize this to using the vtable
51865187
AddIns(tailcall ? INTOP_CALLVIRT_TAIL : INTOP_CALLVIRT);
51875188
m_pLastNewIns->data[0] = GetDataItemIndex(callInfo.hMethod);
5189+
// Reserved slots for caching DispatchToken and its hash
5190+
m_pLastNewIns->data[1] = GetNewDataItemIndex(nullptr);
5191+
int32_t secondCache = GetNewDataItemIndex(nullptr);
5192+
#ifdef DEBUG
5193+
assert(secondCache == (m_pLastNewIns->data[1] + 1));
5194+
#endif
51885195
break;
5189-
5196+
}
51905197
case CORINFO_VIRTUALCALL_LDVIRTFTN:
51915198
if ((callInfo.sig.sigInst.methInstCount != 0) || (m_compHnd->getClassAttribs(m_compHnd->getMethodClass(callInfo.hMethod)) & CORINFO_FLG_SHAREDINST))
51925199
{
@@ -5217,6 +5224,11 @@ void InterpCompiler::EmitCall(CORINFO_RESOLVED_TOKEN* pConstrainedToken, bool re
52175224
{
52185225
AddIns(tailcall ? INTOP_CALLVIRT_TAIL : INTOP_CALLVIRT);
52195226
m_pLastNewIns->data[0] = GetDataItemIndex(callInfo.hMethod);
5227+
m_pLastNewIns->data[1] = GetNewDataItemIndex(nullptr);
5228+
int32_t secondCache = GetNewDataItemIndex(nullptr);
5229+
#ifdef DEBUG
5230+
assert(secondCache == (m_pLastNewIns->data[1] + 1));
5231+
#endif
52205232
}
52215233
break;
52225234

src/coreclr/interpreter/compiler.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,13 @@ class InterpDataItemIndexMap
105105
_dataItems = dataItems;
106106
}
107107

108+
// Allocates a slot in the data items that is not shared with other opcodes
109+
// Typically used for caching data at runtime.
110+
int32_t GetNewDataItemIndex(void* data)
111+
{
112+
return _dataItems->Add(data);
113+
}
114+
108115
int32_t GetDataItemIndex(const InterpGenericLookup& lookup)
109116
{
110117
const size_t sizeOfFieldsConcatenated = sizeof(InterpGenericLookup::offsets) +
@@ -687,6 +694,10 @@ class InterpCompiler
687694
{
688695
return m_genericLookupToDataItemIndex.GetDataItemIndex(data);
689696
}
697+
int32_t GetNewDataItemIndex(void* data)
698+
{
699+
return m_genericLookupToDataItemIndex.GetNewDataItemIndex(data);
700+
}
690701

691702
void* GetDataItemAtIndex(int32_t index);
692703
void* GetAddrOfDataItemAtIndex(int32_t index);

src/coreclr/interpreter/inc/intops.def

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ OPDEF(INTOP_CALL_NULLCHECK, "call.nullcheck", 4, 1, 1, InterpOpMethodHandle)
379379
OPDEF(INTOP_CALLDELEGATE, "call.delegate", 5, 1, 1, InterpOpMethodHandle)
380380
OPDEF(INTOP_CALLDELEGATE_TAIL, "call.delegate.tail", 4, 1, 1, InterpOpMethodHandle)
381381
OPDEF(INTOP_CALLI, "calli", 6, 1, 2, InterpOpLdPtr)
382-
OPDEF(INTOP_CALLVIRT, "callvirt", 4, 1, 1, InterpOpMethodHandle)
382+
OPDEF(INTOP_CALLVIRT, "callvirt", 5, 1, 1, InterpOpMethodHandle)
383383
OPDEF(INTOP_CALL_PINVOKE, "call.pinvoke", 6, 1, 1, InterpOpMethodHandle) // inlined (no marshaling wrapper) pinvokes only
384384
OPDEF(INTOP_NEWOBJ, "newobj", 5, 1, 1, InterpOpMethodHandle)
385385
OPDEF(INTOP_NEWOBJ_GENERIC, "newobj.generic", 6, 1, 2, InterpOpMethodHandle)
@@ -388,7 +388,7 @@ OPDEF(INTOP_NEWOBJ_VT, "newobj.vt", 5, 1, 1, InterpOpMethodHandle)
388388
// Tail calls
389389
OPDEF(INTOP_CALL_TAIL, "call.tail", 4, 1, 1, InterpOpMethodHandle)
390390
OPDEF(INTOP_CALLI_TAIL, "calli.tail", 6, 1, 2, InterpOpLdPtr)
391-
OPDEF(INTOP_CALLVIRT_TAIL, "callvirt.tail", 4, 1, 1, InterpOpMethodHandle)
391+
OPDEF(INTOP_CALLVIRT_TAIL, "callvirt.tail", 5, 1, 1, InterpOpMethodHandle)
392392

393393
// The following helper call instructions exist in 2 variants, one for normal methods, and one for cases where a shared generic lookup is needed.
394394
// In the case where a shared generic lookup is needed an extra argument is passed as an svar, which is a pointer to the generic context.

src/coreclr/vm/contractimpl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,10 @@ struct DispatchToken
403403
LIMITED_METHOD_CONTRACT;
404404
return !(m_token == INVALID_TOKEN);
405405
}
406+
407+
//------------------------------------------------------------------------
408+
// Returns a hash of the token suitable for use in dispatch caches.
409+
UINT16 GetHash() const;
406410
}; // struct DispatchToken
407411

408412
// DispatchToken.m_token should be the only field of DispatchToken.

src/coreclr/vm/interpexec.cpp

Lines changed: 209 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,184 @@
1616
#include <limits>
1717
#include <functional>
1818

19+
struct InterpDispatchCacheEntry
20+
{
21+
// MethodTable of the calling object
22+
MethodTable* pMT;
23+
size_t dispatchToken;
24+
// Resolved target MethodDesc
25+
MethodDesc* pTargetMD;
26+
// Used for linking dead entries for cleanup during GC
27+
InterpDispatchCacheEntry* pNextDead;
28+
29+
InterpDispatchCacheEntry(MethodTable* pMT, size_t dispatchToken, MethodDesc* pTargetMD)
30+
{
31+
this->pMT = pMT;
32+
this->dispatchToken = dispatchToken;
33+
this->pTargetMD = pTargetMD;
34+
pNextDead = nullptr;
35+
}
36+
};
37+
38+
#define DISPATCH_CACHE_BITS 12
39+
#define DISPATCH_CACHE_SIZE (1 << DISPATCH_CACHE_BITS)
40+
#define DISPATCH_CACHE_MASK (DISPATCH_CACHE_SIZE - 1)
41+
42+
// A simple hash table that caches virtual method dispatch results.
43+
// Maps (DispatchToken, MethodTable*) to MethodDesc*.
44+
struct InterpDispatchCache
45+
{
46+
InterpDispatchCacheEntry* m_cache[DISPATCH_CACHE_SIZE];
47+
// List of dead entries to be freed at GC time
48+
InterpDispatchCacheEntry* m_pDeadList;
49+
50+
MethodDesc* Lookup(size_t dispatchToken, void* pMT, uint16_t dispatchTokenHash)
51+
{
52+
LIMITED_METHOD_CONTRACT;
53+
54+
size_t idx = Hash(dispatchToken, pMT, dispatchTokenHash);
55+
56+
InterpDispatchCacheEntry* pEntry = VolatileLoadWithoutBarrier(&m_cache[idx]);
57+
// Data dependency ensures field reads are ordered after loading of `pEntry`
58+
// The entry struct is immutable once created, so these reads are safe
59+
if (pEntry != nullptr && pEntry->pMT == pMT && pEntry->dispatchToken == dispatchToken)
60+
return pEntry->pTargetMD;
61+
62+
return NULL;
63+
}
64+
65+
void Insert(size_t dispatchToken, MethodTable* pMT, MethodDesc* pTargetMD, uint16_t dispatchTokenHash)
66+
{
67+
LIMITED_METHOD_CONTRACT;
68+
69+
size_t idx = Hash(dispatchToken, pMT, dispatchTokenHash);
70+
71+
InterpDispatchCacheEntry* pNewEntry = new (nothrow) InterpDispatchCacheEntry(pMT, dispatchToken, pTargetMD);
72+
if (pNewEntry == nullptr)
73+
return;
74+
75+
// CAS has release semantics, so the fields of the entry have correct
76+
// values once the entry is published. If CAS succeeds, we own the old
77+
// entry for freeing
78+
InterpDispatchCacheEntry* pOldEntry = InterlockedExchangeT(&m_cache[idx], pNewEntry);
79+
80+
if (pOldEntry != nullptr)
81+
AddToDeadList(pOldEntry);
82+
}
83+
84+
// Called during GC sync point to free dead entries
85+
// At this point, no other threads are running, so it is safe to free
86+
void ReclaimDeadEntries()
87+
{
88+
LIMITED_METHOD_CONTRACT;
89+
90+
InterpDispatchCacheEntry* pDeadList = VolatileLoadWithoutBarrier(&m_pDeadList);
91+
92+
while (pDeadList != nullptr)
93+
{
94+
InterpDispatchCacheEntry* pNext = pDeadList->pNextDead;
95+
delete pDeadList;
96+
pDeadList = pNext;
97+
}
98+
99+
VolatileStoreWithoutBarrier(&m_pDeadList, (InterpDispatchCacheEntry*)nullptr);
100+
}
101+
102+
// Add an entry to the dead list for later cleanup
103+
void AddToDeadList(InterpDispatchCacheEntry* pEntry)
104+
{
105+
LIMITED_METHOD_CONTRACT;
106+
107+
InterpDispatchCacheEntry* pOldHead;
108+
do
109+
{
110+
pOldHead = VolatileLoadWithoutBarrier(&m_pDeadList);
111+
pEntry->pNextDead = pOldHead;
112+
}
113+
while (InterlockedCompareExchangeT(&m_pDeadList, pEntry, pOldHead) != pOldHead);
114+
}
115+
116+
// Same as VSD DispatchCache's HashToken + HashMT
117+
static uint16_t Hash(size_t dispatchToken, void* pMT, uint16_t dispatchTokenHash)
118+
{
119+
LIMITED_METHOD_CONTRACT;
120+
121+
size_t mtHash = (size_t)pMT;
122+
mtHash = (((mtHash >> DISPATCH_CACHE_BITS) + mtHash) >> LOG2_PTRSIZE) & DISPATCH_CACHE_MASK;
123+
124+
uint16_t hash = (uint16_t)mtHash;
125+
hash ^= (dispatchTokenHash & DISPATCH_CACHE_MASK);
126+
127+
return hash;
128+
}
129+
130+
void ClearEntriesForLoaderAllocator(LoaderAllocator* pLoaderAllocator)
131+
{
132+
LIMITED_METHOD_CONTRACT;
133+
134+
for (size_t i = 0; i < DISPATCH_CACHE_SIZE; i++)
135+
{
136+
InterpDispatchCacheEntry* pEntry = VolatileLoadWithoutBarrier(&m_cache[i]);
137+
if (pEntry == nullptr)
138+
continue;
139+
140+
if (pEntry->pMT->GetLoaderAllocator() == pLoaderAllocator ||
141+
pEntry->pTargetMD->GetLoaderAllocator() == pLoaderAllocator)
142+
{
143+
VolatileStoreWithoutBarrier(&m_cache[i], (InterpDispatchCacheEntry*)nullptr);
144+
// Given the EE is suspended, we can free the entry without worrying about races
145+
delete pEntry;
146+
}
147+
}
148+
}
149+
};
150+
151+
// Global interpreter dispatch cache instance
152+
static InterpDispatchCache g_InterpDispatchCache;
153+
154+
// Called during GC, when we are guaranteed no entry is being used by any thread.
155+
void InterpDispatchCache_ReclaimAll()
156+
{
157+
CONTRACTL
158+
{
159+
NOTHROW;
160+
GC_NOTRIGGER;
161+
MODE_COOPERATIVE;
162+
// Should only be called during a GC suspension
163+
PRECONDITION(Debug_IsLockedViaThreadSuspension());
164+
}
165+
CONTRACTL_END;
166+
167+
g_InterpDispatchCache.ReclaimDeadEntries();
168+
}
169+
170+
// Clear entries that reference types/methods from the given LoaderAllocator.
171+
// Called during collectible assembly unload when the EE is suspended.
172+
void InterpDispatchCache_ClearForLoaderAllocator(LoaderAllocator* pLoaderAllocator)
173+
{
174+
g_InterpDispatchCache.ClearEntriesForLoaderAllocator(pLoaderAllocator);
175+
}
176+
177+
static size_t CreateDispatchTokenForMethod(MethodDesc* pMD)
178+
{
179+
WRAPPER_NO_CONTRACT;
180+
181+
uint32_t slotNumber = pMD->GetSlot();
182+
183+
if (pMD->IsInterface())
184+
{
185+
// For interface methods, get the interface's TypeID
186+
MethodTable* pInterfaceMT = pMD->GetMethodTable();
187+
uint32_t typeID = pInterfaceMT->GetTypeID();
188+
return DispatchToken::CreateDispatchToken(typeID, slotNumber).To_SIZE_T();
189+
}
190+
else
191+
{
192+
// For non-interface virtual methods, use TYPE_ID_THIS_CLASS
193+
return DispatchToken::CreateDispatchToken(slotNumber).To_SIZE_T();
194+
}
195+
}
196+
19197
#ifdef TARGET_WASM
20198
// Unused on WASM
21199
#define SAVE_THE_LOWEST_SP do {} while (0)
@@ -2686,15 +2864,38 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr
26862864
OBJECTREF *pThisArg = LOCAL_VAR_ADDR(callArgsOffset, OBJECTREF);
26872865
NULL_CHECK(*pThisArg);
26882866

2689-
// Interpreter-TODO
2690-
// This needs to be optimized, not operating at MethodDesc level, rather with ftnptr
2691-
// slots containing the interpreter IR pointer
2692-
targetMethod = CallWithSEHWrapper(
2693-
[&pMD, &pThisArg]() {
2694-
return pMD->GetMethodDescOfVirtualizedCode(pThisArg, pMD->GetMethodTable());
2695-
});
2867+
MethodTable *pObjMT = (*pThisArg)->GetMethodTable();
26962868

2697-
ip += 4;
2869+
// Interpreter-FIXME: It would be nice to have these caches initialized at compile time instead
2870+
// Obtain the cached dispatch token or initialize it
2871+
size_t dispatchToken = (size_t)VolatileLoadWithoutBarrier(&pMethod->pDataItems[ip[4]]);
2872+
if (dispatchToken == 0)
2873+
{
2874+
dispatchToken = CreateDispatchTokenForMethod(pMD);
2875+
VolatileStoreWithoutBarrier(&pMethod->pDataItems[ip[4]], (void*)dispatchToken);
2876+
}
2877+
// The token hash is cached in the data item immediately following the dispatchToken
2878+
size_t dispatchTokenHash = (size_t)VolatileLoadWithoutBarrier(&pMethod->pDataItems[ip[4] + 1]);
2879+
if (dispatchTokenHash == 0)
2880+
{
2881+
dispatchTokenHash = DispatchToken::From_SIZE_T(dispatchToken).GetHash();
2882+
VolatileStoreWithoutBarrier(&pMethod->pDataItems[ip[4] + 1], (void*)dispatchTokenHash);
2883+
}
2884+
2885+
// Try cache lookup first
2886+
targetMethod = g_InterpDispatchCache.Lookup(dispatchToken, pObjMT, (uint16_t)dispatchTokenHash);
2887+
2888+
if (targetMethod == NULL)
2889+
{
2890+
// miss, resolve the virtual method and cache it
2891+
targetMethod = CallWithSEHWrapper(
2892+
[&pMD, &pThisArg]() {
2893+
return pMD->GetMethodDescOfVirtualizedCode(pThisArg, pMD->GetMethodTable());
2894+
});
2895+
g_InterpDispatchCache.Insert(dispatchToken, pObjMT, targetMethod, (uint16_t)dispatchTokenHash);
2896+
}
2897+
2898+
ip += 5;
26982899
goto CALL_INTERP_METHOD;
26992900
}
27002901

@@ -2852,7 +3053,6 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr
28523053
[&targetMethod, &pThisArg]() {
28533054
return targetMethod->GetMethodDescOfVirtualizedCode(pThisArg, targetMethod->GetMethodTable());
28543055
});
2855-
28563056
}
28573057
else
28583058
{

src/coreclr/vm/interpexec.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,4 +125,7 @@ struct UnmanagedMethodWithTransitionParam
125125
PCODE callTarget;
126126
};
127127

128+
void InterpDispatchCache_ReclaimAll();
129+
void InterpDispatchCache_ClearForLoaderAllocator(LoaderAllocator* pLoaderAllocator);
130+
128131
#endif // _INTERPEXEC_H_

src/coreclr/vm/loaderallocator.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
#endif
1414
#include "comcallablewrapper.h"
1515

16+
#ifdef FEATURE_INTERPRETER
17+
#include "interpexec.h"
18+
#endif
19+
1620
//#define ENABLE_LOG_LOADER_ALLOCATOR_CLEANUP 1
1721

1822
#define STUBMANAGER_RANGELIST(stubManager) (stubManager::g_pManager->GetRangeList())
@@ -639,6 +643,10 @@ void LoaderAllocator::GCLoaderAllocators(LoaderAllocator* pOriginalLoaderAllocat
639643
ExecutionManager::Unload(pDomainLoaderAllocatorDestroyIterator);
640644
pDomainLoaderAllocatorDestroyIterator->UninitVirtualCallStubManager();
641645

646+
#ifdef FEATURE_INTERPRETER
647+
InterpDispatchCache_ClearForLoaderAllocator(pDomainLoaderAllocatorDestroyIterator);
648+
#endif
649+
642650
// TODO: Do we really want to perform this on each LoaderAllocator?
643651
MethodTable::ClearMethodDataCache();
644652

src/coreclr/vm/syncclean.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
#include "virtualcallstub.h"
99
#include "threadsuspend.h"
1010

11+
#ifdef FEATURE_INTERPRETER
12+
#include "interpexec.h"
13+
#endif
14+
1115
VolatilePtr<Bucket> SyncClean::m_HashMap = NULL;
1216
VolatilePtr<EEHashEntry*> SyncClean::m_EEHashTable;
1317

@@ -95,4 +99,9 @@ void SyncClean::CleanUp ()
9599

96100
// Give others we want to reclaim during the GC sync point a chance to do it
97101
VirtualCallStubManager::ReclaimAll();
102+
103+
#ifdef FEATURE_INTERPRETER
104+
// Reclaim dead interpreter dispatch cache entries
105+
InterpDispatchCache_ReclaimAll();
106+
#endif
98107
}

0 commit comments

Comments
 (0)