Skip to content

Commit b7df9ff

Browse files
[release/6.0] Fix stress issues around multiple threads throwing the same exceptions (#57959)
* Fix stress issues around multiple threads throwing the same exceptions - The watson codebase manipulates the state of the following fields on Exception in a lock-free manner without locks if there are multiple threads throwing the same exception - _stackTrace - _stackTraceString - _remoteStackTraceString - _watsonBuckets - _ipForWatsonBuckets - The designed behavior is that these apis should "mostly" be correct, but as they are only used for fatal shutdown scenarios, exact correctness is not required for correct program execution - However, there are some race conditions that have been seen recently in testing 1. In some circumstances, the value will be explicitly read from multiple times, where the first read is to check for NULL, and then a second read is to read the actual value and use it in some way. In the presence of a race which sets the value to NULL, the runtime can crash. To fix this, the code is refactored in cases which could lead to crashes with a single read, and carrying around the read value to where it needs to go. 2. Since the C++ memory model generally allows a single read written in C++ to be converted into multiple reads if the compiler can prove that the read does not cross a lock/memory barrier, it is possible for the C++ compiler to inject multiple reads where the logic naturally only has 1. The fix for this is to utlilize the VolatileLoadWithoutBarrier api to specify that a read should happen once in cases where it might cause a problem. Finally, the test45929 was tended to fail in GC stress as it would take a very long time to run under GC stress or on some hardware. Adjust it so that it shuts down after about 2.5 minutes. - Do this instead of disabling running under gcstress as there is evidence that there may have been bugs seen during runs under gcstress. Fixes #46803 * Rename as per suggestion Co-authored-by: David Wrighton <davidwr@microsoft.com>
1 parent 8ddb633 commit b7df9ff

7 files changed

Lines changed: 106 additions & 32 deletions

File tree

src/coreclr/debug/daccess/request.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3842,11 +3842,11 @@ HRESULT ClrDataAccess::GetClrWatsonBucketsWorker(Thread * pThread, GenericModeBl
38423842
if (oThrowable != NULL)
38433843
{
38443844
// Does the throwable have buckets?
3845-
if (((EXCEPTIONREF)oThrowable)->AreWatsonBucketsPresent())
3845+
U1ARRAYREF refWatsonBucketArray = ((EXCEPTIONREF)oThrowable)->GetWatsonBucketReference();
3846+
if (refWatsonBucketArray != NULL)
38463847
{
38473848
// Get the watson buckets from the throwable for non-preallocated
38483849
// exceptions
3849-
U1ARRAYREF refWatsonBucketArray = ((EXCEPTIONREF)oThrowable)->GetWatsonBucketReference();
38503850
pBuckets = dac_cast<PTR_VOID>(refWatsonBucketArray->GetDataPtr());
38513851
}
38523852
else

src/coreclr/vm/excep.cpp

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9096,6 +9096,7 @@ void SetupWatsonBucketsForUEF(BOOL fUseLastThrownObject)
90969096
struct
90979097
{
90989098
OBJECTREF oThrowable;
9099+
U1ARRAYREF oBuckets;
90999100
} gc;
91009101
ZeroMemory(&gc, sizeof(gc));
91019102
GCPROTECT_BEGIN(gc);
@@ -9197,9 +9198,10 @@ void SetupWatsonBucketsForUEF(BOOL fUseLastThrownObject)
91979198
SetupWatsonBucketsForNonPreallocatedExceptions(gc.oThrowable);
91989199
}
91999200

9200-
if (((EXCEPTIONREF)gc.oThrowable)->AreWatsonBucketsPresent())
9201+
gc.oBuckets = ((EXCEPTIONREF)gc.oThrowable)->GetWatsonBucketReference();
9202+
if (gc.oBuckets != NULL)
92019203
{
9202-
pUEWatsonBucketTracker->CopyBucketsFromThrowable(gc.oThrowable);
9204+
pUEWatsonBucketTracker->CopyBuckets(gc.oBuckets);
92039205
}
92049206

92059207
if (pUEWatsonBucketTracker->RetrieveWatsonBuckets() == NULL)
@@ -9519,6 +9521,7 @@ BOOL SetupWatsonBucketsForFailFast(EXCEPTIONREF refException)
95199521
{
95209522
OBJECTREF refException;
95219523
OBJECTREF oInnerMostExceptionThrowable;
9524+
U1ARRAYREF oBuckets;
95229525
} gc;
95239526
ZeroMemory(&gc, sizeof(gc));
95249527
GCPROTECT_BEGIN(gc);
@@ -9669,10 +9672,11 @@ BOOL SetupWatsonBucketsForFailFast(EXCEPTIONREF refException)
96699672
}
96709673

96719674
// If it has the buckets, copy them over to the current Watson bucket tracker
9672-
if (((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->AreWatsonBucketsPresent())
9675+
gc.oBuckets = ((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->GetWatsonBucketReference();
9676+
if (gc.oBuckets != NULL)
96739677
{
96749678
pUEWatsonBucketTracker->ClearWatsonBucketDetails();
9675-
pUEWatsonBucketTracker->CopyBucketsFromThrowable(gc.oInnerMostExceptionThrowable);
9679+
pUEWatsonBucketTracker->CopyBuckets(gc.oBuckets);
96769680
if (pUEWatsonBucketTracker->RetrieveWatsonBuckets() != NULL)
96779681
{
96789682
LOG((LF_EH, LL_INFO1000, "SetupWatsonBucketsForFailFast - Got watson buckets from regular innermost exception.\n"));
@@ -9711,11 +9715,12 @@ BOOL SetupWatsonBucketsForFailFast(EXCEPTIONREF refException)
97119715
SetupWatsonBucketsForNonPreallocatedExceptions(gc.refException);
97129716
}
97139717

9714-
if (((EXCEPTIONREF)gc.refException)->AreWatsonBucketsPresent())
9718+
gc.oBuckets = ((EXCEPTIONREF)gc.refException)->GetWatsonBucketReference();
9719+
if (gc.oBuckets != NULL)
97159720
{
97169721
// Copy the buckets to the current watson bucket tracker
97179722
pUEWatsonBucketTracker->ClearWatsonBucketDetails();
9718-
pUEWatsonBucketTracker->CopyBucketsFromThrowable(gc.refException);
9723+
pUEWatsonBucketTracker->CopyBuckets(gc.oBuckets);
97199724
if (pUEWatsonBucketTracker->RetrieveWatsonBuckets() != NULL)
97209725
{
97219726
LOG((LF_EH, LL_INFO1000, "SetupWatsonBucketsForFailFast - Watson buckets copied from the exception object.\n"));
@@ -9950,6 +9955,9 @@ void SetupInitialThrowBucketDetails(UINT_PTR adjustedIp)
99509955
EX_TRY
99519956
{
99529957
CopyWatsonBucketsToThrowable(pUEWatsonBucketTracker->RetrieveWatsonBuckets());
9958+
9959+
// Technically this assert can fail, as another thread could clear the buckets after
9960+
// CopyWatsonBucketsToThrowable but before the assert runs, but it is very unlikely.
99539961
_ASSERTE(((EXCEPTIONREF)gc.oCurrentThrowable)->AreWatsonBucketsPresent());
99549962
}
99559963
EX_CATCH
@@ -10686,16 +10694,15 @@ void EHWatsonBucketTracker::Init()
1068610694

1068710695
// This method copies the bucketing details from the specified throwable
1068810696
// to the current Watson Bucket tracker.
10689-
void EHWatsonBucketTracker::CopyBucketsFromThrowable(OBJECTREF oThrowable)
10697+
void EHWatsonBucketTracker::CopyBuckets(U1ARRAYREF oBuckets)
1069010698
{
1069110699
#ifndef DACCESS_COMPILE
1069210700
CONTRACTL
1069310701
{
1069410702
NOTHROW;
1069510703
GC_NOTRIGGER;
1069610704
MODE_ANY;
10697-
PRECONDITION(oThrowable != NULL);
10698-
PRECONDITION(((EXCEPTIONREF)oThrowable)->AreWatsonBucketsPresent());
10705+
PRECONDITION(oBuckets != NULL);
1069910706
PRECONDITION(IsWatsonEnabled());
1070010707
}
1070110708
CONTRACTL_END;
@@ -10704,16 +10711,16 @@ void EHWatsonBucketTracker::CopyBucketsFromThrowable(OBJECTREF oThrowable)
1070410711

1070510712
struct
1070610713
{
10707-
OBJECTREF oFrom;
10714+
U1ARRAYREF oFromBuckets;
1070810715
} _gc;
1070910716

1071010717
ZeroMemory(&_gc, sizeof(_gc));
1071110718
GCPROTECT_BEGIN(_gc);
1071210719

10713-
_gc.oFrom = oThrowable;
10720+
_gc.oFromBuckets = oBuckets;
1071410721

10715-
LOG((LF_EH, LL_INFO1000, "EHWatsonBucketTracker::CopyEHWatsonBucketTracker - Copying bucketing details from throwable (%p) to tracker (%p)\n",
10716-
OBJECTREFToObject(_gc.oFrom), this));
10722+
LOG((LF_EH, LL_INFO1000, "EHWatsonBucketTracker::CopyEHWatsonBucketTracker - Copying bucketing details from bucket (%p) to tracker (%p)\n",
10723+
OBJECTREFToObject(_gc.oFromBuckets), this));
1071710724

1071810725
// Watson bucket is a "GenericModeBlock" type. Set up an empty GenericModeBlock
1071910726
// to hold the bucket parameters.
@@ -10728,8 +10735,7 @@ void EHWatsonBucketTracker::CopyBucketsFromThrowable(OBJECTREF oThrowable)
1072810735
else
1072910736
{
1073010737
// Get the raw array data pointer
10731-
U1ARRAYREF refWatsonBucketArray = ((EXCEPTIONREF)_gc.oFrom)->GetWatsonBucketReference();
10732-
PTR_VOID pRawWatsonBucketArray = dac_cast<PTR_VOID>(refWatsonBucketArray->GetDataPtr());
10738+
PTR_VOID pRawWatsonBucketArray = dac_cast<PTR_VOID>(_gc.oFromBuckets->GetDataPtr());
1073310739

1073410740
// Copy over the details to our new allocation
1073510741
memcpyNoGCRefs(pgmb, pRawWatsonBucketArray, sizeof(GenericModeBlock));

src/coreclr/vm/exstatecommon.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ class EHWatsonBucketTracker
483483
EHWatsonBucketTracker();
484484
void Init();
485485
void CopyEHWatsonBucketTracker(const EHWatsonBucketTracker& srcTracker);
486-
void CopyBucketsFromThrowable(OBJECTREF oThrowable);
486+
void CopyBuckets(U1ARRAYREF oBuckets);
487487
void SaveIpForWatsonBucket(UINT_PTR ip);
488488
UINT_PTR RetrieveWatsonBucketIp();
489489
PTR_VOID RetrieveWatsonBuckets();

src/coreclr/vm/object.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,41 @@ OBJECTREF::OBJECTREF(const OBJECTREF & objref)
10331033
}
10341034

10351035

1036+
//-------------------------------------------------------------
1037+
// VolatileLoadWithoutBarrier constructor
1038+
//-------------------------------------------------------------
1039+
OBJECTREF::OBJECTREF(const OBJECTREF *pObjref, tagVolatileLoadWithoutBarrier tag)
1040+
{
1041+
STATIC_CONTRACT_NOTHROW;
1042+
STATIC_CONTRACT_GC_NOTRIGGER;
1043+
STATIC_CONTRACT_MODE_COOPERATIVE;
1044+
STATIC_CONTRACT_FORBID_FAULT;
1045+
1046+
Object* objrefAsObj = VolatileLoadWithoutBarrier(&pObjref->m_asObj);
1047+
VALIDATEOBJECT(objrefAsObj);
1048+
1049+
// !!! If this assert is fired, there are two possibilities:
1050+
// !!! 1. You are doing a type cast, e.g. *(OBJECTREF*)pObj
1051+
// !!! Instead, you should use ObjectToOBJECTREF(*(Object**)pObj),
1052+
// !!! or ObjectToSTRINGREF(*(StringObject**)pObj)
1053+
// !!! 2. There is a real GC hole here.
1054+
// !!! Either way you need to fix the code.
1055+
_ASSERTE(Thread::IsObjRefValid(pObjref));
1056+
if ((objrefAsObj != 0) &&
1057+
((IGCHeap*)GCHeapUtilities::GetGCHeap())->IsHeapPointer( (BYTE*)this ))
1058+
{
1059+
_ASSERTE(!"Write Barrier violation. Must use SetObjectReference() to assign OBJECTREF's into the GC heap!");
1060+
}
1061+
m_asObj = objrefAsObj;
1062+
1063+
if (m_asObj != 0) {
1064+
ENABLESTRESSHEAP();
1065+
}
1066+
1067+
Thread::ObjectRefNew(this);
1068+
}
1069+
1070+
10361071
//-------------------------------------------------------------
10371072
// To allow NULL to be used as an OBJECTREF.
10381073
//-------------------------------------------------------------

src/coreclr/vm/object.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2418,7 +2418,7 @@ class ExceptionObject : public Object
24182418
OBJECTREF GetInnerException()
24192419
{
24202420
LIMITED_METHOD_DAC_CONTRACT;
2421-
return _innerException;
2421+
return VolatileLoadWithoutBarrierOBJECTREF(&_innerException);
24222422
}
24232423

24242424
// Returns the innermost exception object - equivalent of the
@@ -2431,7 +2431,7 @@ class ExceptionObject : public Object
24312431
OBJECTREF oInnerMostException = NULL;
24322432
OBJECTREF oCurrent = NULL;
24332433

2434-
oCurrent = _innerException;
2434+
oCurrent = GetInnerException();
24352435
while(oCurrent != NULL)
24362436
{
24372437
oInnerMostException = oCurrent;
@@ -2469,7 +2469,7 @@ class ExceptionObject : public Object
24692469
STRINGREF GetRemoteStackTraceString()
24702470
{
24712471
LIMITED_METHOD_DAC_CONTRACT;
2472-
return _remoteStackTraceString;
2472+
return (STRINGREF)VolatileLoadWithoutBarrierOBJECTREF(&_remoteStackTraceString);
24732473
}
24742474

24752475
void SetHelpURL(STRINGREF helpURL)
@@ -2512,15 +2512,15 @@ class ExceptionObject : public Object
25122512
U1ARRAYREF GetWatsonBucketReference()
25132513
{
25142514
LIMITED_METHOD_CONTRACT;
2515-
return _watsonBuckets;
2515+
return (U1ARRAYREF)VolatileLoadWithoutBarrierOBJECTREF(&_watsonBuckets);
25162516
}
25172517

25182518
// This method will return a BOOL to indicate if the
25192519
// watson buckets are present or not.
25202520
BOOL AreWatsonBucketsPresent()
25212521
{
25222522
LIMITED_METHOD_CONTRACT;
2523-
return (_watsonBuckets != NULL)?TRUE:FALSE;
2523+
return (GetWatsonBucketReference() != NULL)?TRUE:FALSE;
25242524
}
25252525

25262526
// This method will save the IP to be used for watson bucketing.
@@ -2545,7 +2545,7 @@ class ExceptionObject : public Object
25452545
{
25462546
LIMITED_METHOD_CONTRACT;
25472547

2548-
return _ipForWatsonBuckets;
2548+
return VolatileLoadWithoutBarrier(&_ipForWatsonBuckets);
25492549
}
25502550

25512551
// README:

src/coreclr/vm/vars.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ class OBJECTREF {
157157
};
158158

159159
public:
160+
enum class tagVolatileLoadWithoutBarrier { tag };
161+
160162
//-------------------------------------------------------------
161163
// Default constructor, for non-initializing declarations:
162164
//
@@ -169,6 +171,12 @@ class OBJECTREF {
169171
//-------------------------------------------------------------
170172
OBJECTREF(const OBJECTREF & objref);
171173

174+
//-------------------------------------------------------------
175+
// Copy constructor, for passing OBJECTREF's as function arguments
176+
// using a volatile without barrier load
177+
//-------------------------------------------------------------
178+
OBJECTREF(const OBJECTREF * pObjref, tagVolatileLoadWithoutBarrier tag);
179+
172180
//-------------------------------------------------------------
173181
// To allow NULL to be used as an OBJECTREF.
174182
//-------------------------------------------------------------
@@ -302,6 +310,7 @@ class REF : public OBJECTREF
302310
#define OBJECTREFToObject(objref) ((objref).operator-> ())
303311
#define ObjectToSTRINGREF(obj) (STRINGREF(obj))
304312
#define STRINGREFToObject(objref) (*( (StringObject**) &(objref) ))
313+
#define VolatileLoadWithoutBarrierOBJECTREF(pObj) (OBJECTREF(pObj, OBJECTREF::tagVolatileLoadWithoutBarrier::tag))
305314

306315
// the while (0) syntax below is to force a trailing semicolon on users of the macro
307316
#define VALIDATEOBJECT(obj) do {if ((obj) != NULL) (obj)->Validate();} while (0)
@@ -316,6 +325,7 @@ class REF : public OBJECTREF
316325
#define OBJECTREFToObject(objref) ((PTR_Object) (objref))
317326
#define ObjectToSTRINGREF(obj) ((PTR_StringObject) (obj))
318327
#define STRINGREFToObject(objref) ((PTR_StringObject) (objref))
328+
#define VolatileLoadWithoutBarrierOBJECTREF(pObj) VolatileLoadWithoutBarrier(pObj)
319329

320330
#endif // _DEBUG_IMPL
321331

src/tests/Regressions/coreclr/GitHub_45929/test45929.cs

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System;
5+
using System.Diagnostics;
56
using System.Reflection;
67
using System.Runtime.ExceptionServices;
78
using System.Threading;
@@ -46,18 +47,40 @@ public static void Run()
4647
long progress = 0;
4748
var test = new Test();
4849
const int MaxCount = 1000000;
49-
Parallel.For(
50-
0,
51-
MaxCount,
52-
new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount },
53-
i =>
50+
int increment = 100;
51+
bool done = false;
52+
Stopwatch stopwatch = new Stopwatch();
53+
stopwatch.Start();
54+
Console.WriteLine($"{DateTime.Now} : {progress * 100D / MaxCount:000.0}% : {stopwatch.ElapsedMilliseconds}");
55+
56+
Action<int> makeProgress = i =>
5457
{
55-
if (Interlocked.Increment(ref progress) % 10000 == 0)
58+
if (done) return;
59+
long newProgress = Interlocked.Increment(ref progress);
60+
if (newProgress % increment == 0)
5661
{
57-
Console.WriteLine($"{DateTime.Now} : {progress * 100D / MaxCount:000.0}%");
62+
int newIncrement = (increment * 3) / 2;
63+
if (newIncrement > 10000)
64+
newIncrement = 10000;
65+
increment = newIncrement;
66+
67+
Console.WriteLine($"{DateTime.Now} : {newProgress * 100D / MaxCount:000.0}% : {stopwatch.ElapsedMilliseconds}");
68+
if (stopwatch.ElapsedMilliseconds > 150000)
69+
{
70+
Console.WriteLine($"Attempting to finish early");
71+
done = true;
72+
}
5873
}
5974
test.Invoke();
60-
});
75+
};
76+
77+
makeProgress(0);
78+
79+
Parallel.For(
80+
1,
81+
MaxCount,
82+
new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount },
83+
makeProgress);
6184
}
6285

6386
public void Invoke()

0 commit comments

Comments
 (0)