diff --git a/CHANGES.rst b/CHANGES.rst index 2e7904b5..1a1ec148 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,12 +12,16 @@ `_. Patch by fygao-wish. - Fix a leak of a list object when the last reference to a greenlet - was deleted from some other thread than the one it belonged too. For - this to work correctly, you must call a greenlet API like - ``getcurrent()`` before the thread owning the greenlet exits; we - hope to lift this limitation. Note that in some cases this may also - fix leaks of greenlet objects themselves. See `issue 251 <>`_. - + was deleted from some other thread than the one to which it + belonged. For this to work correctly, you must call a greenlet API + like ``getcurrent()`` before the thread owning the greenlet exits; + we hope to lift this limitation. Note that in some cases this may + also fix leaks of greenlet objects themselves. See `issue 251 + `_. +- Python 3.10: Tracing or profiling into a spawned greenlet didn't + work as expected. See `issue 256 + `_, reported + by Joe Rickerby. 1.1.1 (2021-08-06) ================== diff --git a/src/greenlet/greenlet.c b/src/greenlet/greenlet.c index cd22f7dc..f47bbf88 100644 --- a/src/greenlet/greenlet.c +++ b/src/greenlet/greenlet.c @@ -136,6 +136,11 @@ static PyGreenlet* volatile ts_current = NULL; static PyObject* volatile ts_passaround_args = NULL; static PyObject* volatile ts_passaround_kwargs = NULL; +/* Used internally in ``g_switchstack()`` */ +#if GREENLET_USE_CFRAME +static int volatile ts__g_switchstack_use_tracing = 0; +#endif + /***********************************************************/ /* Thread-aware routines, switching global variables when needed */ @@ -195,6 +200,7 @@ green_create_main(void) } gmain->stack_start = (char*)1; gmain->stack_stop = (char*)-1; + /* GetDict() returns a borrowed reference. Make it strong. */ gmain->run_info = dict; Py_INCREF(dict); return gmain; @@ -484,26 +490,37 @@ static int GREENLET_NOINLINE(slp_save_state)(char* stackref) return 0; } +/** + Perform a stack switch according to some global variables + that must be set before calling this function. Those variables + are: + + - ts_current: current greenlet (holds a reference) + - ts_target: greenlet to switch to (weak reference) + - ts_passaround_args: NULL if PyErr_Occurred(), + else a tuple of args sent to ts_target (holds a reference) + - ts_passaround_kwargs: switch kwargs (holds a reference) + + Because the stack switch happens in this function, this function can't use + its own stack (local) variables, set before the switch, and then accessed after the + switch. Global variables beginning with ``ts__g_switchstack`` are used + internally instead. + + On return results are passed via global variables as well: + + - ts_origin: originating greenlet (holds a reference) + - ts_current: current greenlet (holds a reference) + - ts_passaround_args: NULL if PyErr_Occurred(), + else a tuple of args sent to ts_current (holds a reference) + - ts_passaround_kwargs: switch kwargs (holds a reference) + + It is very important that stack switch is 'atomic', i.e. no + calls into other Python code allowed (except very few that + are safe), because global variables are very fragile. +*/ static int g_switchstack(void) { - /* Perform a stack switch according to some global variables - that must be set before: - - ts_current: current greenlet (holds a reference) - - ts_target: greenlet to switch to (weak reference) - - ts_passaround_args: NULL if PyErr_Occurred(), - else a tuple of args sent to ts_target (holds a reference) - - ts_passaround_kwargs: switch kwargs (holds a reference) - On return results are passed via global variables as well: - - ts_origin: originating greenlet (holds a reference) - - ts_current: current greenlet (holds a reference) - - ts_passaround_args: NULL if PyErr_Occurred(), - else a tuple of args sent to ts_current (holds a reference) - - ts_passaround_kwargs: switch kwargs (holds a reference) - It is very important that stack switch is 'atomic', i.e. no - calls into other Python code allowed (except very few that - are safe), because global variables are very fragile. - */ int err; { /* save state */ PyGreenlet* current = ts_current; @@ -522,10 +539,23 @@ g_switchstack(void) current->exc_traceback = tstate->exc_traceback; #endif #if GREENLET_USE_CFRAME + /* + IMPORTANT: ``cframe`` is a pointer into the STACK. + Thus, because the call to ``slp_switch()`` + changes the contents of the stack, you cannot read from + ``ts_current->cframe`` after that call and necessarily + get the same values you get from reading it here. Anything + you need to restore from now to then must be saved + in a global variable (because we can't use stack variables + here either). + */ current->cframe = tstate->cframe; + ts__g_switchstack_use_tracing = tstate->cframe->use_tracing; #endif } + err = slp_switch(); + if (err < 0) { /* error */ PyGreenlet* current = ts_current; current->top_frame = NULL; @@ -569,6 +599,13 @@ g_switchstack(void) #if GREENLET_USE_CFRAME tstate->cframe = target->cframe; + /* + If we were tracing, we need to keep tracing. + There should never be the possibility of hitting the + root_cframe here. See note above about why we can't + just copy this from ``origin->cframe->use_tracing``. + */ + tstate->cframe->use_tracing = ts__g_switchstack_use_tracing; #endif assert(ts_origin == NULL); @@ -681,10 +718,8 @@ g_switch(PyGreenlet* target, PyObject* args, PyObject* kwargs) PyObject* tracefunc; origin = ts_origin; ts_origin = NULL; - current = ts_current; - if ((tracefunc = PyDict_GetItem(current->run_info, ts_tracekey)) != - NULL) { + if ((tracefunc = PyDict_GetItem(current->run_info, ts_tracekey)) != NULL) { Py_INCREF(tracefunc); if (g_calltrace(tracefunc, args ? ts_event_switch : ts_event_throw, @@ -768,7 +803,15 @@ static int GREENLET_NOINLINE(g_initialstub)(void* mark) PyGreenlet* self = ts_target; PyObject* args = ts_passaround_args; PyObject* kwargs = ts_passaround_kwargs; - +#if GREENLET_USE_CFRAME + /* + See green_new(). This is a stack-allocated variable used + while *self* is in PyObject_Call(). + We want to defer copying the state info until we're sure + we need it and are in a stable place to do so. + */ + CFrame trace_info; +#endif /* save exception in case getattr clears it */ PyErr_Fetch(&exc, &val, &tb); /* self.run is the object to call in the new greenlet */ @@ -809,6 +852,17 @@ static int GREENLET_NOINLINE(g_initialstub)(void* mark) return 1; } +#if GREENLET_USE_CFRAME + /* OK, we need it, we're about to switch greenlets, save the state. */ + trace_info = *PyThreadState_GET()->cframe; + /* Make the target greenlet refer to the stack value. */ + self->cframe = &trace_info; + /* + And restore the link to the previous frame so this one gets + unliked appropriately. + */ + self->cframe->previous = &PyThreadState_GET()->root_cframe; +#endif /* start the greenlet */ self->stack_start = NULL; self->stack_stop = (char*)mark; @@ -832,8 +886,8 @@ static int GREENLET_NOINLINE(g_initialstub)(void* mark) err = g_switchstack(); /* returns twice! - The 1st time with err=1: we are in the new greenlet - The 2nd time with err=0: back in the caller's greenlet + The 1st time with ``err == 1``: we are in the new greenlet + The 2nd time with ``err <= 0``: back in the caller's greenlet */ if (err == 1) { /* in the new greenlet */ @@ -853,8 +907,7 @@ static int GREENLET_NOINLINE(g_initialstub)(void* mark) Py_INCREF(self->run_info); Py_XDECREF(o); - if ((tracefunc = PyDict_GetItem(self->run_info, ts_tracekey)) != - NULL) { + if ((tracefunc = PyDict_GetItem(self->run_info, ts_tracekey)) != NULL) { Py_INCREF(tracefunc); if (g_calltrace(tracefunc, args ? ts_event_switch : ts_event_throw, @@ -921,6 +974,47 @@ green_new(PyTypeObject* type, PyObject* args, PyObject* kwds) Py_INCREF(ts_current); ((PyGreenlet*)o)->parent = ts_current; #if GREENLET_USE_CFRAME + /* + The PyThreadState->cframe pointer usually points to memory on the + stack, alloceted in a call into PyEval_EvalFrameDefault. + + Initially, before any evaluation begins, it points to the initial + PyThreadState object's ``root_cframe`` object, which is statically + allocated for the lifetime of the thread. + + A greenlet can last for longer than a call to + PyEval_EvalFrameDefault, so we can't set its ``cframe`` pointer to + be the current ``PyThreadState->cframe``; nor could we use one from + the greenlet parent for the same reason. Yet a further no: we can't + allocate one scoped to the greenlet and then destroy it when the + greenlet is deallocated, because inside the interpreter the CFrame + objects form a linked list, and that too can result in accessing + memory beyond its dynamic lifetime (if the greenlet doesn't actually + finish before it dies, its entry could still be in the list). + + Using the ``root_cframe`` is problematic, though, because its + members are never modified by the interpreter and are set to 0, + meaning that its ``use_tracing`` flag is never updated. We don't + want to modify that value in the ``root_cframe`` ourself: it + *shouldn't* matter much because we should probably never get back to + the point where that's the only cframe on the stack; even if it did + matter, the major consequence of an incorrect value for + ``use_tracing`` is that if its true the interpreter does some extra + work --- however, it's just good code hygiene. + + Our solution: before a greenlet runs, after its initial creation, + it uses the ``root_cframe`` just to have something to put there. + However, once the greenlet is actually switched to for the first + time, ``g_initialstub`` (which doesn't actually "return" while the + greenlet is running) stores a new CFrame on its local stack, and + copies the appropriate values from the currently running CFrame; + this is then made the CFrame for the newly-minted greenlet. + ``g_initialstub`` then proceeds to call ``glet.run()``, which + results in ``PyEval_...`` adding the CFrame to the list. Switches + continue as normal. Finally, when the greenlet finishes, the call to + ``glet.run()`` returns and the CFrame is taken out of the linked + list and the stack value is now unused and free to expire. + */ ((PyGreenlet*)o)->cframe = &PyThreadState_GET()->root_cframe; #endif } diff --git a/src/greenlet/tests/test_tracing.py b/src/greenlet/tests/test_tracing.py index 4f34b156..2ab4d715 100644 --- a/src/greenlet/tests/test_tracing.py +++ b/src/greenlet/tests/test_tracing.py @@ -1,52 +1,267 @@ +import sys import unittest -import threading import greenlet class SomeError(Exception): pass -class TracingTests(unittest.TestCase): - if greenlet.GREENLET_USE_TRACING: - def test_greenlet_tracing(self): - main = greenlet.getcurrent() - actions = [] - def trace(*args): - actions.append(args) - def dummy(): - pass - def dummyexc(): - raise SomeError() - oldtrace = greenlet.settrace(trace) - try: - g1 = greenlet.greenlet(dummy) - g1.switch() - g2 = greenlet.greenlet(dummyexc) - self.assertRaises(SomeError, g2.switch) - finally: - greenlet.settrace(oldtrace) - self.assertEqual(actions, [ - ('switch', (main, g1)), - ('switch', (g1, main)), - ('switch', (main, g2)), - ('throw', (g2, main)), - ]) - - def test_exception_disables_tracing(self): - main = greenlet.getcurrent() - actions = [] - def trace(*args): - actions.append(args) - raise SomeError() - def dummy(): - main.switch() - g = greenlet.greenlet(dummy) - g.switch() - oldtrace = greenlet.settrace(trace) - try: - self.assertRaises(SomeError, g.switch) - self.assertEqual(greenlet.gettrace(), None) - finally: - greenlet.settrace(oldtrace) - self.assertEqual(actions, [ - ('switch', (main, g)), - ]) +class GreenletTracer(object): + oldtrace = None + + def __init__(self, error_on_trace=False): + self.actions = [] + self.error_on_trace = error_on_trace + + def __call__(self, *args): + self.actions.append(args) + if self.error_on_trace: + raise SomeError + + def __enter__(self): + self.oldtrace = greenlet.settrace(self) + return self.actions + + def __exit__(self, *args): + greenlet.settrace(self.oldtrace) + + +class TestGreenletTracing(unittest.TestCase): + """ + Tests of ``greenlet.settrace()`` + """ + + def test_greenlet_tracing(self): + main = greenlet.getcurrent() + def dummy(): + pass + def dummyexc(): + raise SomeError() + + with GreenletTracer() as actions: + g1 = greenlet.greenlet(dummy) + g1.switch() + g2 = greenlet.greenlet(dummyexc) + self.assertRaises(SomeError, g2.switch) + + self.assertEqual(actions, [ + ('switch', (main, g1)), + ('switch', (g1, main)), + ('switch', (main, g2)), + ('throw', (g2, main)), + ]) + + def test_exception_disables_tracing(self): + main = greenlet.getcurrent() + def dummy(): + main.switch() + g = greenlet.greenlet(dummy) + g.switch() + with GreenletTracer(error_on_trace=True) as actions: + self.assertRaises(SomeError, g.switch) + self.assertEqual(greenlet.gettrace(), None) + + self.assertEqual(actions, [ + ('switch', (main, g)), + ]) + + +class PythonTracer(object): + oldtrace = None + + def __init__(self): + self.actions = [] + + def __call__(self, frame, event, arg): + # Record the co_name so we have an idea what function we're in. + self.actions.append((event, frame.f_code.co_name)) + + def __enter__(self): + self.oldtrace = sys.setprofile(self) + return self.actions + + def __exit__(self, *args): + sys.setprofile(self.oldtrace) + +def tpt_callback(): + return 42 + +class TestPythonTracing(unittest.TestCase): + """ + Tests of the interaction of ``sys.settrace()`` + with greenlet facilities. + + NOTE: Most of this is probably CPython specific. + """ + + maxDiff = None + + def test_trace_events_trivial(self): + with PythonTracer() as actions: + tpt_callback() + # If we use the sys.settrace instead of setprofile, we get + # this: + + # self.assertEqual(actions, [ + # ('call', 'tpt_callback'), + # ('call', '__exit__'), + # ]) + + self.assertEqual(actions, [ + ('return', '__enter__'), + ('call', 'tpt_callback'), + ('return', 'tpt_callback'), + ('call', '__exit__'), + ('c_call', '__exit__'), + ]) + + def _trace_switch(self, glet): + with PythonTracer() as actions: + glet.switch() + return actions + + def _check_trace_events_func_already_set(self, glet): + actions = self._trace_switch(glet) + self.assertEqual(actions, [ + ('return', '__enter__'), + ('c_call', '_trace_switch'), + ('call', 'run'), + ('call', 'tpt_callback'), + ('return', 'tpt_callback'), + ('return', 'run'), + ('c_return', '_trace_switch'), + ('call', '__exit__'), + ('c_call', '__exit__'), + ]) + + def test_trace_events_into_greenlet_func_already_set(self): + def run(): + return tpt_callback() + + self._check_trace_events_func_already_set(greenlet.greenlet(run)) + + def test_trace_events_into_greenlet_subclass_already_set(self): + class X(greenlet.greenlet): + def run(self): + return tpt_callback() + self._check_trace_events_func_already_set(X()) + + def _check_trace_events_from_greenlet_sets_profiler(self, g, tracer): + g.switch() + tpt_callback() + tracer.__exit__() + self.assertEqual(tracer.actions, [ + ('return', '__enter__'), + ('call', 'tpt_callback'), + ('return', 'tpt_callback'), + ('return', 'run'), + ('call', 'tpt_callback'), + ('return', 'tpt_callback'), + ('call', '__exit__'), + ('c_call', '__exit__'), + ]) + + + def test_trace_events_from_greenlet_func_sets_profiler(self): + tracer = PythonTracer() + def run(): + tracer.__enter__() + return tpt_callback() + + self._check_trace_events_from_greenlet_sets_profiler(greenlet.greenlet(run), + tracer) + + def test_trace_events_from_greenlet_subclass_sets_profiler(self): + tracer = PythonTracer() + class X(greenlet.greenlet): + def run(self): + tracer.__enter__() + return tpt_callback() + + self._check_trace_events_from_greenlet_sets_profiler(X(), tracer) + + + def test_trace_events_multiple_greenlets_switching(self): + tracer = PythonTracer() + + g1 = None + g2 = None + + def g1_run(): + tracer.__enter__() + tpt_callback() + g2.switch() + tpt_callback() + return 42 + + def g2_run(): + tpt_callback() + tracer.__exit__() + tpt_callback() + g1.switch() + + g1 = greenlet.greenlet(g1_run) + g2 = greenlet.greenlet(g2_run) + + x = g1.switch() + self.assertEqual(x, 42) + tpt_callback() # ensure not in the trace + self.assertEqual(tracer.actions, [ + ('return', '__enter__'), + ('call', 'tpt_callback'), + ('return', 'tpt_callback'), + ('c_call', 'g1_run'), + ('call', 'g2_run'), + ('call', 'tpt_callback'), + ('return', 'tpt_callback'), + ('call', '__exit__'), + ('c_call', '__exit__'), + ]) + + def test_trace_events_multiple_greenlets_switching_siblings(self): + # Like the first version, but get both greenlets running first + # as "siblings" and then establish the tracing. + tracer = PythonTracer() + + g1 = None + g2 = None + + def g1_run(): + greenlet.getcurrent().parent.switch() + tracer.__enter__() + tpt_callback() + g2.switch() + tpt_callback() + return 42 + + def g2_run(): + greenlet.getcurrent().parent.switch() + + tpt_callback() + tracer.__exit__() + tpt_callback() + g1.switch() + + g1 = greenlet.greenlet(g1_run) + g2 = greenlet.greenlet(g2_run) + + # Start g1 + g1.switch() + # And it immediately returns control to us. + # Start g2 + g2.switch() + # Which also returns. Now kick of the real part of the + # test. + x = g1.switch() + self.assertEqual(x, 42) + + tpt_callback() # ensure not in the trace + self.assertEqual(tracer.actions, [ + ('return', '__enter__'), + ('call', 'tpt_callback'), + ('return', 'tpt_callback'), + ('c_call', 'g1_run'), + ('call', 'tpt_callback'), + ('return', 'tpt_callback'), + ('call', '__exit__'), + ('c_call', '__exit__'), + ])