From edb55facae9fc3e41373e27d42f6878fe4dfbe2d Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Thu, 2 Sep 2021 13:38:40 +0200 Subject: [PATCH 01/29] feat(transport): Experimental sdk outcomes support --- sentry_sdk/client.py | 1 + sentry_sdk/envelope.py | 2 ++ sentry_sdk/transport.py | 48 ++++++++++++++++++++++++++++++++++++----- 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 7687baa76f..73daaaa6b9 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -200,6 +200,7 @@ def _prepare_event( new_event = before_send(event, hint or {}) if new_event is None: logger.info("before send dropped event (%s)", event) + self.tansport.record_lost_event("before_send") event = new_event # type: ignore return event diff --git a/sentry_sdk/envelope.py b/sentry_sdk/envelope.py index 5645eb8a12..23fa5db360 100644 --- a/sentry_sdk/envelope.py +++ b/sentry_sdk/envelope.py @@ -236,6 +236,8 @@ def data_category(self): return "transaction" elif ty == "event": return "error" + elif ty == "sdk_outcomes": + return "sdk_outcomes" else: return "default" diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index a254b4f6ee..546cfbc453 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -4,12 +4,13 @@ import urllib3 # type: ignore import certifi import gzip +import time from datetime import datetime, timedelta from sentry_sdk.utils import Dsn, logger, capture_internal_exceptions, json_dumps from sentry_sdk.worker import BackgroundWorker -from sentry_sdk.envelope import Envelope +from sentry_sdk.envelope import Envelope, Item, PayloadRef from sentry_sdk._types import MYPY @@ -131,6 +132,8 @@ def __init__( self._auth = self.parsed_dsn.to_auth("sentry.python/%s" % VERSION) self._disabled_until = {} # type: Dict[DataCategory, datetime] self._retry = urllib3.util.Retry() + self._event_loss_counters = {} + self._last_event_loss_sent = None self._pool = self._make_pool( self.parsed_dsn, @@ -143,6 +146,13 @@ def __init__( self.hub_cls = Hub + def record_lost_event(self, reason): + """This increments a counter for event loss by reason. The counters are flushed + periodically automatically (typically once a minute). + """ + # This is not locked because we are okay with small mismeasuring. + self._event_loss_counters[reason] = self._event_loss_counters.get(reason, 0) + 1 + def _update_rate_limits(self, response): # type: (urllib3.HTTPResponse) -> None @@ -207,7 +217,24 @@ def _send_request( def on_dropped_event(self, reason): # type: (str) -> None - pass + self.record_lost_event(reason) + + def _flush_stats(self, force=False): + if not (force or self._last_event_loss_sent is None or \ + self._last_event_loss_sent < time.time() - 60): + return + outcomes = self._event_loss_counters + self._event_loss_counters = {} + + if outcomes: + self.capture_envelope(Envelope( + items=[Item(PayloadRef(json={ + "timestamp": time.time(), + "outcomes": outcomes, + }), type="sdk_outomes")], + )) + + self._last_event_loss_sent = time.time() def _check_disabled(self, category): # type: (str) -> bool @@ -254,9 +281,15 @@ def _send_envelope( # type: (...) -> None # remove all items from the envelope which are over quota - envelope.items[:] = [ - x for x in envelope.items if not self._check_disabled(x.data_category) - ] + new_items = [] + for item in envelope.items: + if self._check_disabled(item.data_category): + if item.data_category in ("transaction", "error", "default"): + self.on_dropped_event("self_rate_limits") + else: + new_items.append(item) + + envelope.items[:] = new_items if not envelope.items: return None @@ -341,6 +374,8 @@ def send_event_wrapper(): if not self._worker.submit(send_event_wrapper): self.on_dropped_event("full_queue") + self._flush_stats() + def capture_envelope( self, envelope # type: Envelope ): @@ -356,6 +391,8 @@ def send_envelope_wrapper(): if not self._worker.submit(send_envelope_wrapper): self.on_dropped_event("full_queue") + self._flush_stats() + def flush( self, timeout, # type: float @@ -363,6 +400,7 @@ def flush( ): # type: (...) -> None logger.debug("Flushing HTTP transport") + self._flush_stats(force=True) if timeout > 0: self._worker.flush(timeout, callback) From 9e45ec0405a81f92d3dcc6c8289cce1c10b74228 Mon Sep 17 00:00:00 2001 From: sentry-bot Date: Thu, 2 Sep 2021 11:39:11 +0000 Subject: [PATCH 02/29] fix: Formatting --- sentry_sdk/transport.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index 546cfbc453..95f6ac94f9 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -220,19 +220,31 @@ def on_dropped_event(self, reason): self.record_lost_event(reason) def _flush_stats(self, force=False): - if not (force or self._last_event_loss_sent is None or \ - self._last_event_loss_sent < time.time() - 60): + if not ( + force + or self._last_event_loss_sent is None + or self._last_event_loss_sent < time.time() - 60 + ): return outcomes = self._event_loss_counters self._event_loss_counters = {} if outcomes: - self.capture_envelope(Envelope( - items=[Item(PayloadRef(json={ - "timestamp": time.time(), - "outcomes": outcomes, - }), type="sdk_outomes")], - )) + self.capture_envelope( + Envelope( + items=[ + Item( + PayloadRef( + json={ + "timestamp": time.time(), + "outcomes": outcomes, + } + ), + type="sdk_outomes", + ) + ], + ) + ) self._last_event_loss_sent = time.time() From 34a20448366b2bc2aa57bbbdef5f0f6ba6742186 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Fri, 3 Sep 2021 11:15:32 +0200 Subject: [PATCH 03/29] feat: use internal as data category for sdk outcomes --- .vscode/settings.json | 3 ++- sentry_sdk/_types.py | 9 ++++++++- sentry_sdk/envelope.py | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index c7cadb4d6c..c167a13dc2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,4 @@ { - "python.pythonPath": ".venv/bin/python" + "python.pythonPath": ".venv/bin/python", + "python.formatting.provider": "black" } \ No newline at end of file diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index a69896a248..7ce7e9e4f6 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -37,7 +37,14 @@ NotImplementedType = Any EventDataCategory = Literal[ - "default", "error", "crash", "transaction", "security", "attachment", "session" + "default", + "error", + "crash", + "transaction", + "security", + "attachment", + "session", + "internal", ] SessionStatus = Literal["ok", "exited", "crashed", "abnormal"] EndpointType = Literal["store", "envelope"] diff --git a/sentry_sdk/envelope.py b/sentry_sdk/envelope.py index 23fa5db360..dbc046b22f 100644 --- a/sentry_sdk/envelope.py +++ b/sentry_sdk/envelope.py @@ -237,7 +237,7 @@ def data_category(self): elif ty == "event": return "error" elif ty == "sdk_outcomes": - return "sdk_outcomes" + return "internal" else: return "default" From 71af7132e79678b253c9a18e3f993058b7d46603 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sun, 5 Sep 2021 21:26:28 +0200 Subject: [PATCH 04/29] Update sentry_sdk/client.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kamil Ogórek --- sentry_sdk/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 73daaaa6b9..b28e19e4ae 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -200,7 +200,7 @@ def _prepare_event( new_event = before_send(event, hint or {}) if new_event is None: logger.info("before send dropped event (%s)", event) - self.tansport.record_lost_event("before_send") + self.transport.record_lost_event("before_send") event = new_event # type: ignore return event From 4ebac46163562ef39e27c9afd29a21538139496b Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sun, 5 Sep 2021 23:07:54 +0200 Subject: [PATCH 05/29] fix: better align recording of lost events with expectations --- sentry_sdk/client.py | 3 +- sentry_sdk/envelope.py | 2 +- sentry_sdk/tracing.py | 13 +++++-- sentry_sdk/transport.py | 80 +++++++++++++++++++++++++++-------------- 4 files changed, 66 insertions(+), 32 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index b28e19e4ae..6e15292044 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -200,7 +200,6 @@ def _prepare_event( new_event = before_send(event, hint or {}) if new_event is None: logger.info("before send dropped event (%s)", event) - self.transport.record_lost_event("before_send") event = new_event # type: ignore return event @@ -244,6 +243,8 @@ def _should_capture( self.options["sample_rate"] < 1.0 and random.random() >= self.options["sample_rate"] ): + # record a lost event if we did not sample this. + self.transport.record_lost_event("sample_rate", "error") return False if self._is_ignored_error(event, hint): diff --git a/sentry_sdk/envelope.py b/sentry_sdk/envelope.py index dbc046b22f..65d8f9e56d 100644 --- a/sentry_sdk/envelope.py +++ b/sentry_sdk/envelope.py @@ -236,7 +236,7 @@ def data_category(self): return "transaction" elif ty == "event": return "error" - elif ty == "sdk_outcomes": + elif ty == "client_report": return "internal" else: return "default" diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 4ce25f27c2..ac0611be68 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -507,13 +507,20 @@ def finish(self, hub=None): # This transaction is already finished, ignore. return None + hub = hub or self.hub or sentry_sdk.Hub.current + client = hub.client + # This is a de facto proxy for checking if sampled = False if self._span_recorder is None: logger.debug("Discarding transaction because sampled = False") - return None - hub = hub or self.hub or sentry_sdk.Hub.current - client = hub.client + # This is not entirely accurate because discards here are not + # exclusively based on sample rate but also traces sampler, but + # we handle this the same here. + if client: + client.transport.record_lost_event("sample_rate", "transaction") + + return None if client is None: # We have no client and therefore nowhere to send this transaction. diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index 95f6ac94f9..164a8ac540 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -93,6 +93,13 @@ def kill(self): """Forcefully kills the transport.""" pass + def record_lost_event(self, reason, data_category="default"): + """This increments a counter for event loss by reason and + data category. + """ + # type: (...) -> None + pass + def __del__(self): # type: () -> None try: @@ -132,7 +139,7 @@ def __init__( self._auth = self.parsed_dsn.to_auth("sentry.python/%s" % VERSION) self._disabled_until = {} # type: Dict[DataCategory, datetime] self._retry = urllib3.util.Retry() - self._event_loss_counters = {} + self._discarded_events = {} self._last_event_loss_sent = None self._pool = self._make_pool( @@ -146,12 +153,10 @@ def __init__( self.hub_cls = Hub - def record_lost_event(self, reason): - """This increments a counter for event loss by reason. The counters are flushed - periodically automatically (typically once a minute). - """ + def record_lost_event(self, reason, data_category="default"): # This is not locked because we are okay with small mismeasuring. - self._event_loss_counters[reason] = self._event_loss_counters.get(reason, 0) + 1 + key = (data_category, reason) + self._discarded_events[key] = self._discarded_events.get(key, 0) + 1 def _update_rate_limits(self, response): # type: (urllib3.HTTPResponse) -> None @@ -177,7 +182,15 @@ def _send_request( body, # type: bytes headers, # type: Dict[str, str] endpoint_type="store", # type: EndpointType + envelope=None, # type: Optional[Envelope] ): + def record_loss(reason): + if envelope is None: + self.record_lost_event(reason, "error") + else: + for item in envelope.items: + self.record_lost_event(reason, item.data_category) + # type: (...) -> None headers.update( { @@ -194,6 +207,7 @@ def _send_request( ) except Exception: self.on_dropped_event("network") + record_loss("network_error") raise try: @@ -201,7 +215,9 @@ def _send_request( if response.status == 429: # if we hit a 429. Something was rate limited but we already - # acted on this in `self._update_rate_limits`. + # acted on this in `self._update_rate_limits`. Note that we + # do not want to record event loss here as we will have recorded + # an outcome in relay already. self.on_dropped_event("status_429") pass @@ -212,42 +228,45 @@ def _send_request( response.data, ) self.on_dropped_event("status_{}".format(response.status)) + record_loss("network_error") finally: response.close() def on_dropped_event(self, reason): # type: (str) -> None - self.record_lost_event(reason) + return None def _flush_stats(self, force=False): + discarded_events = self._discarded_events + if not ( force + or not discarded_events or self._last_event_loss_sent is None or self._last_event_loss_sent < time.time() - 60 ): return - outcomes = self._event_loss_counters - self._event_loss_counters = {} - - if outcomes: - self.capture_envelope( - Envelope( - items=[ - Item( - PayloadRef( - json={ - "timestamp": time.time(), - "outcomes": outcomes, - } - ), - type="sdk_outomes", - ) - ], - ) - ) + self._discarded_events = {} self._last_event_loss_sent = time.time() + client_report = Item( + PayloadRef( + json={ + "timestamp": time.time(), + "discarded_events": [ + (data_category, reason, quantity) + for ( + (data_category, reason), + quantity, + ) in discarded_events.items() + ], + } + ), + type="client_report", + ) + self.capture_envelope(Envelope(items=[client_report])) + def _check_disabled(self, category): # type: (str) -> bool def _disabled(bucket): @@ -264,6 +283,7 @@ def _send_event( if self._check_disabled("error"): self.on_dropped_event("self_rate_limits") + self.record_lost_event("ratelimit_backoff", "error") return None body = io.BytesIO() @@ -298,6 +318,7 @@ def _send_envelope( if self._check_disabled(item.data_category): if item.data_category in ("transaction", "error", "default"): self.on_dropped_event("self_rate_limits") + self.record_lost_event("ratelimit_backoff", item.data_category) else: new_items.append(item) @@ -316,6 +337,7 @@ def _send_envelope( self.parsed_dsn.project_id, self.parsed_dsn.host, ) + self._send_request( body.getvalue(), headers={ @@ -323,6 +345,7 @@ def _send_envelope( "Content-Encoding": "gzip", }, endpoint_type="envelope", + envelope=envelope, ) return None @@ -385,6 +408,7 @@ def send_event_wrapper(): if not self._worker.submit(send_event_wrapper): self.on_dropped_event("full_queue") + self.record_lost_event("queue_overflow", "error") self._flush_stats() @@ -402,6 +426,8 @@ def send_envelope_wrapper(): if not self._worker.submit(send_envelope_wrapper): self.on_dropped_event("full_queue") + for item in envelope.items: + self.record_lost_event("queue_overflow", item.data_category) self._flush_stats() From 8022c6f83ef7201a3f3e7a3f8a3e434d754e7ad1 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sun, 5 Sep 2021 23:24:07 +0200 Subject: [PATCH 06/29] fix: typing --- sentry_sdk/client.py | 3 ++- sentry_sdk/tracing.py | 2 +- sentry_sdk/transport.py | 36 +++++++++++++++++++++++++----------- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 6e15292044..ab232da13f 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -244,7 +244,8 @@ def _should_capture( and random.random() >= self.options["sample_rate"] ): # record a lost event if we did not sample this. - self.transport.record_lost_event("sample_rate", "error") + if self.transport: + self.transport.record_lost_event("sample_rate", "error") return False if self._is_ignored_error(event, hint): diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index ac0611be68..3cf18f3ab5 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -517,7 +517,7 @@ def finish(self, hub=None): # This is not entirely accurate because discards here are not # exclusively based on sample rate but also traces sampler, but # we handle this the same here. - if client: + if client and client.transport: client.transport.record_lost_event("sample_rate", "transaction") return None diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index 164a8ac540..c3a8f1c0d5 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -7,6 +7,7 @@ import time from datetime import datetime, timedelta +from collections import defaultdict from sentry_sdk.utils import Dsn, logger, capture_internal_exceptions, json_dumps from sentry_sdk.worker import BackgroundWorker @@ -23,6 +24,7 @@ from typing import Tuple from typing import Type from typing import Union + from typing import DefaultDict from urllib3.poolmanager import PoolManager # type: ignore from urllib3.poolmanager import ProxyManager @@ -93,12 +95,16 @@ def kill(self): """Forcefully kills the transport.""" pass - def record_lost_event(self, reason, data_category="default"): + def record_lost_event( + self, + reason, # type: str + data_category="default", # type: str + ): + # type: (...) -> None """This increments a counter for event loss by reason and data category. """ - # type: (...) -> None - pass + return None def __del__(self): # type: () -> None @@ -139,8 +145,10 @@ def __init__( self._auth = self.parsed_dsn.to_auth("sentry.python/%s" % VERSION) self._disabled_until = {} # type: Dict[DataCategory, datetime] self._retry = urllib3.util.Retry() - self._discarded_events = {} - self._last_event_loss_sent = None + self._discarded_events = defaultdict( + int + ) # type: DefaultDict[Tuple[str, str], int] + self._last_event_loss_sent = None # type: Optional[float] self._pool = self._make_pool( self.parsed_dsn, @@ -153,10 +161,13 @@ def __init__( self.hub_cls = Hub - def record_lost_event(self, reason, data_category="default"): - # This is not locked because we are okay with small mismeasuring. - key = (data_category, reason) - self._discarded_events[key] = self._discarded_events.get(key, 0) + 1 + def record_lost_event( + self, + reason, # type: str + data_category="default", # type: str + ): + # type: (...) -> None + self._discarded_events[data_category, reason] += 1 def _update_rate_limits(self, response): # type: (urllib3.HTTPResponse) -> None @@ -184,14 +195,16 @@ def _send_request( endpoint_type="store", # type: EndpointType envelope=None, # type: Optional[Envelope] ): + # type: (...) -> None + def record_loss(reason): + # type: (str) -> None if envelope is None: self.record_lost_event(reason, "error") else: for item in envelope.items: self.record_lost_event(reason, item.data_category) - # type: (...) -> None headers.update( { "User-Agent": str(self._auth.client), @@ -237,6 +250,7 @@ def on_dropped_event(self, reason): return None def _flush_stats(self, force=False): + # type: (bool) -> None discarded_events = self._discarded_events if not ( @@ -247,7 +261,7 @@ def _flush_stats(self, force=False): ): return - self._discarded_events = {} + self._discarded_events = defaultdict(int) self._last_event_loss_sent = time.time() client_report = Item( From 834e78ade56fa167268367594024520da9621dc0 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sun, 5 Sep 2021 23:30:24 +0200 Subject: [PATCH 07/29] fix: condition --- sentry_sdk/transport.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index c3a8f1c0d5..be328af8fc 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -255,10 +255,9 @@ def _flush_stats(self, force=False): if not ( force - or not discarded_events or self._last_event_loss_sent is None or self._last_event_loss_sent < time.time() - 60 - ): + ) or not discarded_events: return self._discarded_events = defaultdict(int) From 3c587e162b7ff5f13b72a2b7e83d50eb888a68a3 Mon Sep 17 00:00:00 2001 From: sentry-bot Date: Sun, 5 Sep 2021 21:30:48 +0000 Subject: [PATCH 08/29] fix: Formatting --- sentry_sdk/transport.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index be328af8fc..24e4ac820a 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -253,11 +253,14 @@ def _flush_stats(self, force=False): # type: (bool) -> None discarded_events = self._discarded_events - if not ( - force - or self._last_event_loss_sent is None - or self._last_event_loss_sent < time.time() - 60 - ) or not discarded_events: + if ( + not ( + force + or self._last_event_loss_sent is None + or self._last_event_loss_sent < time.time() - 60 + ) + or not discarded_events + ): return self._discarded_events = defaultdict(int) From 51b3b8ccfbad25a1801fd5855ebe990b22b54644 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sun, 5 Sep 2021 23:51:52 +0200 Subject: [PATCH 09/29] fix: reformat --- scripts/init_serverless_sdk.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/scripts/init_serverless_sdk.py b/scripts/init_serverless_sdk.py index 878ff6029e..7a414ff406 100644 --- a/scripts/init_serverless_sdk.py +++ b/scripts/init_serverless_sdk.py @@ -51,16 +51,23 @@ def extract_and_load_lambda_function_module(self, module_path): # Supported python versions are 2.7, 3.6, 3.7, 3.8 if py_version >= (3, 5): import importlib.util - spec = importlib.util.spec_from_file_location(module_name, module_file_path) + + spec = importlib.util.spec_from_file_location( + module_name, module_file_path + ) self.lambda_function_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(self.lambda_function_module) elif py_version[0] < 3: import imp - self.lambda_function_module = imp.load_source(module_name, module_file_path) + + self.lambda_function_module = imp.load_source( + module_name, module_file_path + ) else: raise ValueError("Python version %s is not supported." % py_version) else: import importlib + self.lambda_function_module = importlib.import_module(module_path) def get_lambda_handler(self): From a6cc9718fe398acee134e6ee9297e0fddea9b359 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 6 Sep 2021 00:36:32 +0200 Subject: [PATCH 10/29] fix: tests and added send_client_reports option --- sentry_sdk/consts.py | 1 + sentry_sdk/transport.py | 8 ++++++-- tests/test_transport.py | 18 ++++++++++++++++-- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index a9822e8223..5370fec7b2 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -75,6 +75,7 @@ def __init__( traces_sampler=None, # type: Optional[TracesSampler] auto_enabling_integrations=True, # type: bool auto_session_tracking=True, # type: bool + send_client_reports=True, # type: bool _experiments={}, # type: Experiments # noqa: B006 ): # type: (...) -> None diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index 24e4ac820a..8facfca7ba 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -167,7 +167,8 @@ def record_lost_event( data_category="default", # type: str ): # type: (...) -> None - self._discarded_events[data_category, reason] += 1 + if self.options["send_client_reports"]: + self._discarded_events[data_category, reason] += 1 def _update_rate_limits(self, response): # type: (urllib3.HTTPResponse) -> None @@ -251,6 +252,8 @@ def on_dropped_event(self, reason): def _flush_stats(self, force=False): # type: (bool) -> None + if not self.options["send_client_reports"]: + return discarded_events = self._discarded_events if ( @@ -454,8 +457,9 @@ def flush( ): # type: (...) -> None logger.debug("Flushing HTTP transport") - self._flush_stats(force=True) + if timeout > 0: + self._worker.submit(lambda: self._flush_stats(force=True)) self._worker.flush(timeout, callback) def kill(self): diff --git a/tests/test_transport.py b/tests/test_transport.py index 96145eb951..b55627ffd7 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -130,8 +130,16 @@ def test_simple_rate_limits(httpserver, capsys, caplog, make_client): @pytest.mark.parametrize("response_code", [200, 429]) -def test_data_category_limits(httpserver, capsys, caplog, response_code, make_client): - client = make_client() +def test_data_category_limits( + httpserver, capsys, caplog, response_code, make_client, monkeypatch +): + client = make_client(send_client_reports=False) + + captured_outcomes = [] + monkeypatch.setattr( + client.transport, "record_lost_event", lambda *x: captured_outcomes.append(x) + ) + httpserver.serve_content( "hm", response_code, @@ -157,6 +165,12 @@ def test_data_category_limits(httpserver, capsys, caplog, response_code, make_cl client.flush() assert len(httpserver.requests) == 1 + assert httpserver.requests[0].url.endswith("/api/132/store/") + + assert captured_outcomes == [ + ("ratelimit_backoff", "transaction"), + ("ratelimit_backoff", "transaction"), + ] @pytest.mark.parametrize("response_code", [200, 429]) From 049bc5ee6181e6de1336ff544755882de55ba77d Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 6 Sep 2021 00:46:52 +0200 Subject: [PATCH 11/29] fix: make lint happy --- sentry_sdk/transport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index 8facfca7ba..860c3d6e96 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -140,7 +140,7 @@ def __init__( Transport.__init__(self, options) assert self.parsed_dsn is not None - self.options = options + self.options = options # type: Dict[str, Any] self._worker = BackgroundWorker(queue_size=options["transport_queue_size"]) self._auth = self.parsed_dsn.to_auth("sentry.python/%s" % VERSION) self._disabled_until = {} # type: Dict[DataCategory, datetime] From ca94e19f26e47a420cfaf8e6f5b823dbb9074216 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 6 Sep 2021 19:52:32 +0200 Subject: [PATCH 12/29] ref: change protocol to latest revision --- sentry_sdk/transport.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index 860c3d6e96..fb560fc083 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -274,9 +274,9 @@ def _flush_stats(self, force=False): json={ "timestamp": time.time(), "discarded_events": [ - (data_category, reason, quantity) + {"reason": reason, "category": category, "quantity": quantity} for ( - (data_category, reason), + (category, reason), quantity, ) in discarded_events.items() ], From 1ccaf3032b3fb1e81b5e2afb39d3dd485b2a3962 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 6 Sep 2021 22:26:10 +0200 Subject: [PATCH 13/29] ref: attach pending client reports to the most recent envelope --- sentry_sdk/transport.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index fb560fc083..d605047ff2 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -250,7 +250,7 @@ def on_dropped_event(self, reason): # type: (str) -> None return None - def _flush_stats(self, force=False): + def _fetch_pending_client_report(self, force=False): # type: (bool) -> None if not self.options["send_client_reports"]: return @@ -269,7 +269,7 @@ def _flush_stats(self, force=False): self._discarded_events = defaultdict(int) self._last_event_loss_sent = time.time() - client_report = Item( + return Item( PayloadRef( json={ "timestamp": time.time(), @@ -284,7 +284,12 @@ def _flush_stats(self, force=False): ), type="client_report", ) - self.capture_envelope(Envelope(items=[client_report])) + + def _flush_client_reports(self, force=False): + # type: (bool) -> None + client_report = self._fetch_pending_client_report(force=force) + if client_report is not None: + self.capture_envelope(Envelope(items=[client_report])) def _check_disabled(self, category): # type: (str) -> bool @@ -345,6 +350,15 @@ def _send_envelope( if not envelope.items: return None + # since we're already in the business of sending out an envelope here + # check if we have one pending for the stats session envelopes so we + # can attach it to this enveloped scheduled for sending. This will + # currently typically attach the client report to the most recent + # session update. + client_report_item = self._fetch_pending_client_report() + if client_report_item is not None: + envelope.items.append(client_report_item) + body = io.BytesIO() with gzip.GzipFile(fileobj=body, mode="w") as f: envelope.serialize_into(f) @@ -429,7 +443,7 @@ def send_event_wrapper(): self.on_dropped_event("full_queue") self.record_lost_event("queue_overflow", "error") - self._flush_stats() + self._flush_client_reports() def capture_envelope( self, envelope # type: Envelope @@ -448,7 +462,7 @@ def send_envelope_wrapper(): for item in envelope.items: self.record_lost_event("queue_overflow", item.data_category) - self._flush_stats() + self._flush_client_reports() def flush( self, @@ -459,7 +473,7 @@ def flush( logger.debug("Flushing HTTP transport") if timeout > 0: - self._worker.submit(lambda: self._flush_stats(force=True)) + self._worker.submit(lambda: self._flush_client_reports(force=True)) self._worker.flush(timeout, callback) def kill(self): From add2c4c610cc2c3193dfc2d5d417c554e1a94e06 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 6 Sep 2021 22:49:43 +0200 Subject: [PATCH 14/29] ref: correctly count bytes for attachments --- sentry_sdk/client.py | 2 +- sentry_sdk/tracing.py | 4 +++- sentry_sdk/transport.py | 38 +++++++++++++++++++++++++------------- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index ab232da13f..05ea4dec99 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -245,7 +245,7 @@ def _should_capture( ): # record a lost event if we did not sample this. if self.transport: - self.transport.record_lost_event("sample_rate", "error") + self.transport.record_lost_event("sample_rate", data_category="error") return False if self._is_ignored_error(event, hint): diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 3cf18f3ab5..749ab63b5b 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -518,7 +518,9 @@ def finish(self, hub=None): # exclusively based on sample rate but also traces sampler, but # we handle this the same here. if client and client.transport: - client.transport.record_lost_event("sample_rate", "transaction") + client.transport.record_lost_event( + "sample_rate", data_category="transaction" + ) return None diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index d605047ff2..6f8bb8c675 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -98,7 +98,8 @@ def kill(self): def record_lost_event( self, reason, # type: str - data_category="default", # type: str + data_category=None, # type: Optional[str] + item=None, # type: Optional[Item] ): # type: (...) -> None """This increments a counter for event loss by reason and @@ -164,11 +165,22 @@ def __init__( def record_lost_event( self, reason, # type: str - data_category="default", # type: str + data_category=None, # type: Optional[str] + item=None, # type: Optional[Item] ): # type: (...) -> None - if self.options["send_client_reports"]: - self._discarded_events[data_category, reason] += 1 + if not self.options["send_client_reports"]: + return + + quantity = 1 + if item is not None: + data_category = item.data_category + if data_category == "attachment": + quantity = len(item.get_bytes()) + elif data_category is None: + raise TypeError("data category not provided") + + self._discarded_events[data_category, reason] += quantity def _update_rate_limits(self, response): # type: (urllib3.HTTPResponse) -> None @@ -201,10 +213,10 @@ def _send_request( def record_loss(reason): # type: (str) -> None if envelope is None: - self.record_lost_event(reason, "error") + self.record_lost_event(reason, data_category="error") else: for item in envelope.items: - self.record_lost_event(reason, item.data_category) + self.record_lost_event(reason, item=item) headers.update( { @@ -251,9 +263,9 @@ def on_dropped_event(self, reason): return None def _fetch_pending_client_report(self, force=False): - # type: (bool) -> None + # type: (bool) -> Optional[Item] if not self.options["send_client_reports"]: - return + return None discarded_events = self._discarded_events if ( @@ -264,7 +276,7 @@ def _fetch_pending_client_report(self, force=False): ) or not discarded_events ): - return + return None self._discarded_events = defaultdict(int) self._last_event_loss_sent = time.time() @@ -307,7 +319,7 @@ def _send_event( if self._check_disabled("error"): self.on_dropped_event("self_rate_limits") - self.record_lost_event("ratelimit_backoff", "error") + self.record_lost_event("ratelimit_backoff", data_category="error") return None body = io.BytesIO() @@ -342,7 +354,7 @@ def _send_envelope( if self._check_disabled(item.data_category): if item.data_category in ("transaction", "error", "default"): self.on_dropped_event("self_rate_limits") - self.record_lost_event("ratelimit_backoff", item.data_category) + self.record_lost_event("ratelimit_backoff", item=item) else: new_items.append(item) @@ -441,7 +453,7 @@ def send_event_wrapper(): if not self._worker.submit(send_event_wrapper): self.on_dropped_event("full_queue") - self.record_lost_event("queue_overflow", "error") + self.record_lost_event("queue_overflow", data_category="error") self._flush_client_reports() @@ -460,7 +472,7 @@ def send_envelope_wrapper(): if not self._worker.submit(send_envelope_wrapper): self.on_dropped_event("full_queue") for item in envelope.items: - self.record_lost_event("queue_overflow", item.data_category) + self.record_lost_event("queue_overflow", item=item) self._flush_client_reports() From 38ea2e111f1f964da9ce463d3cf874d2d299d4d7 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 6 Sep 2021 22:52:30 +0200 Subject: [PATCH 15/29] fix: test with changed lost event recording --- tests/test_transport.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/test_transport.py b/tests/test_transport.py index b55627ffd7..a6445236c9 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -136,9 +136,13 @@ def test_data_category_limits( client = make_client(send_client_reports=False) captured_outcomes = [] - monkeypatch.setattr( - client.transport, "record_lost_event", lambda *x: captured_outcomes.append(x) - ) + + def record_lost_event(reason, data_category=None, item=None): + if data_category is None: + data_category = item.data_category + return captured_outcomes.append((reason, data_category)) + + monkeypatch.setattr(client.transport, "record_lost_event", record_lost_event) httpserver.serve_content( "hm", From 5c62e8171f73e1a8540de3b158b40b4e62befc52 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 7 Sep 2021 00:27:58 +0200 Subject: [PATCH 16/29] fix: add test for client reports and fix periodic flushing --- sentry_sdk/transport.py | 26 +++++------- tests/test_transport.py | 87 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 96 insertions(+), 17 deletions(-) diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index 6f8bb8c675..0b2822d401 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -149,7 +149,7 @@ def __init__( self._discarded_events = defaultdict( int ) # type: DefaultDict[Tuple[str, str], int] - self._last_event_loss_sent = None # type: Optional[float] + self._last_client_report_sent = time.time() self._pool = self._make_pool( self.parsed_dsn, @@ -266,20 +266,16 @@ def _fetch_pending_client_report(self, force=False): # type: (bool) -> Optional[Item] if not self.options["send_client_reports"]: return None - discarded_events = self._discarded_events - if ( - not ( - force - or self._last_event_loss_sent is None - or self._last_event_loss_sent < time.time() - 60 - ) - or not discarded_events - ): + if not (force or self._last_client_report_sent < time.time() - 60): return None + discarded_events = self._discarded_events self._discarded_events = defaultdict(int) - self._last_event_loss_sent = time.time() + self._last_client_report_sent = time.time() + + if not discarded_events: + return None return Item( PayloadRef( @@ -354,7 +350,7 @@ def _send_envelope( if self._check_disabled(item.data_category): if item.data_category in ("transaction", "error", "default"): self.on_dropped_event("self_rate_limits") - self.record_lost_event("ratelimit_backoff", item=item) + self.record_lost_event("ratelimit_backoff", item=item) else: new_items.append(item) @@ -450,13 +446,12 @@ def send_event_wrapper(): with hub: with capture_internal_exceptions(): self._send_event(event) + self._flush_client_reports() if not self._worker.submit(send_event_wrapper): self.on_dropped_event("full_queue") self.record_lost_event("queue_overflow", data_category="error") - self._flush_client_reports() - def capture_envelope( self, envelope # type: Envelope ): @@ -468,14 +463,13 @@ def send_envelope_wrapper(): with hub: with capture_internal_exceptions(): self._send_envelope(envelope) + self._flush_client_reports() if not self._worker.submit(send_envelope_wrapper): self.on_dropped_event("full_queue") for item in envelope.items: self.record_lost_event("queue_overflow", item=item) - self._flush_client_reports() - def flush( self, timeout, # type: float diff --git a/tests/test_transport.py b/tests/test_transport.py index a6445236c9..424bed0de9 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -1,16 +1,29 @@ # coding: utf-8 +import time import logging import pickle +import json +import gzip +import io from datetime import datetime, timedelta import pytest -from sentry_sdk import Hub, Client, add_breadcrumb, capture_message +from sentry_sdk import Hub, Client, add_breadcrumb, capture_message, Scope from sentry_sdk.transport import _parse_rate_limits +from sentry_sdk.envelope import Envelope from sentry_sdk.integrations.logging import LoggingIntegration +def get_event(request): + return json.load(gzip.GzipFile(fileobj=io.BytesIO(request.data))) + + +def get_envelope(request): + return Envelope.deserialize_from(gzip.GzipFile(fileobj=io.BytesIO(request.data))) + + @pytest.fixture def make_client(request, httpserver): def inner(**kwargs): @@ -177,6 +190,78 @@ def record_lost_event(reason, data_category=None, item=None): ] +@pytest.mark.parametrize("response_code", [200, 429]) +def test_data_category_limits_reporting( + httpserver, capsys, caplog, response_code, make_client, monkeypatch +): + client = make_client(send_client_reports=True) + + httpserver.serve_content( + "hm", + response_code, + headers={ + "X-Sentry-Rate-Limits": "4711:transaction:organization, 4711:attachment:organization" + }, + ) + + client.capture_event({"type": "transaction"}) + client.flush() + + assert len(httpserver.requests) == 1 + assert httpserver.requests[0].url.endswith("/api/132/envelope/") + del httpserver.requests[:] + + assert set(client.transport._disabled_until) == set(["attachment", "transaction"]) + + client.capture_event({"type": "transaction"}) + client.capture_event({"type": "transaction"}) + del httpserver.requests[:] + + # now trick the transport to force flush sending out the stats with the + # next envelope + time.sleep(0.2) + client.transport._last_client_report_sent = 0 + + scope = Scope() + scope.add_attachment(bytes=b"Hello World", filename="hello.txt") + + client.capture_event({"type": "error"}, scope=scope) + client.flush() + + # this goes out with an extra envelope because it's flushed after the last item + # that is normally in the queue. This is quite funny in a way beacuse it means + # that the envelope that caused its own over quota report (an error with an + # attachment) will include its outcome since it's pending. + assert len(httpserver.requests) == 1 + envelope = get_envelope(httpserver.requests[0]) + assert envelope.items[0].type == "event" + assert envelope.items[1].type == "client_report" + report = json.loads(envelope.items[1].get_bytes()) + assert report["discarded_events"] == [ + {"category": "transaction", "reason": "ratelimit_backoff", "quantity": 2}, + {"category": "attachment", "reason": "ratelimit_backoff", "quantity": 11}, + ] + del httpserver.requests[:] + + # here we sent a normal event + client.capture_event({"type": "transaction"}) + client.capture_event({"type": "error", "release": "foo"}) + client.flush() + + assert len(httpserver.requests) == 2 + + event = get_event(httpserver.requests[0]) + assert event["type"] == "error" + assert event["release"] == "foo" + + envelope = get_envelope(httpserver.requests[1]) + assert envelope.items[0].type == "client_report" + report = json.loads(envelope.items[0].get_bytes()) + assert report["discarded_events"] == [ + {"category": "transaction", "reason": "ratelimit_backoff", "quantity": 1}, + ] + + @pytest.mark.parametrize("response_code", [200, 429]) def test_complex_limits_without_data_category( httpserver, capsys, caplog, response_code, make_client From da373e44bd27869a07c4b44774e138a359f7a170 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 7 Sep 2021 00:30:53 +0200 Subject: [PATCH 17/29] fix: try to attach to an envelope every 30, flush separate every 60 --- sentry_sdk/transport.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index 0b2822d401..d843f0267f 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -262,12 +262,12 @@ def on_dropped_event(self, reason): # type: (str) -> None return None - def _fetch_pending_client_report(self, force=False): - # type: (bool) -> Optional[Item] + def _fetch_pending_client_report(self, force=False, interval=60): + # type: (bool, int) -> Optional[Item] if not self.options["send_client_reports"]: return None - if not (force or self._last_client_report_sent < time.time() - 60): + if not (force or self._last_client_report_sent < time.time() - interval): return None discarded_events = self._discarded_events @@ -295,7 +295,7 @@ def _fetch_pending_client_report(self, force=False): def _flush_client_reports(self, force=False): # type: (bool) -> None - client_report = self._fetch_pending_client_report(force=force) + client_report = self._fetch_pending_client_report(force=force, interval=60) if client_report is not None: self.capture_envelope(Envelope(items=[client_report])) @@ -363,7 +363,7 @@ def _send_envelope( # can attach it to this enveloped scheduled for sending. This will # currently typically attach the client report to the most recent # session update. - client_report_item = self._fetch_pending_client_report() + client_report_item = self._fetch_pending_client_report(interval=30) if client_report_item is not None: envelope.items.append(client_report_item) From e57691a5cb2bc1db58babd557c23d22c7cbbdcc1 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 7 Sep 2021 10:49:43 +0200 Subject: [PATCH 18/29] ref: switch to a custom wsgi server to better capture envelopes --- tests/test_transport.py | 144 ++++++++++++++++++++++++++-------------- 1 file changed, 93 insertions(+), 51 deletions(-) diff --git a/tests/test_transport.py b/tests/test_transport.py index 424bed0de9..a2860c1a1c 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -9,6 +9,9 @@ from datetime import datetime, timedelta import pytest +from werkzeug.wrappers import Request, Response + +from pytest_localserver.http import WSGIServer from sentry_sdk import Hub, Client, add_breadcrumb, capture_message, Scope from sentry_sdk.transport import _parse_rate_limits @@ -16,19 +19,64 @@ from sentry_sdk.integrations.logging import LoggingIntegration -def get_event(request): - return json.load(gzip.GzipFile(fileobj=io.BytesIO(request.data))) +class CapturedData(object): + def __init__(self, path, event=None, envelope=None): + self.path = path + self.event = event + self.envelope = envelope + + +class CapturingServer(WSGIServer): + def __init__(self, host="127.0.0.1", port=0, ssl_context=None): + WSGIServer.__init__(self, host, port, self, ssl_context=ssl_context) + self.code = 204 + self.headers = {} + self.captured = [] + + def respond_with(self, code=200, headers=None): + self.code = code + if headers: + self.headers = headers + + def clear_captured(self): + del self.captured[:] + + def __call__(self, environ, start_response): + """ + This is the WSGI application. + """ + request = Request(environ) + event = envelope = None + if request.mimetype == "application/json": + event = json.load(gzip.GzipFile(fileobj=io.BytesIO(request.data))) + else: + envelope = Envelope.deserialize_from( + gzip.GzipFile(fileobj=io.BytesIO(request.data)) + ) + + self.captured.append( + CapturedData(path=request.path, event=event, envelope=envelope) + ) + response = Response(status=self.code) + response.headers.extend(self.headers) + return response(environ, start_response) -def get_envelope(request): - return Envelope.deserialize_from(gzip.GzipFile(fileobj=io.BytesIO(request.data))) + +@pytest.fixture +def capturing_server(request): + server = CapturingServer() + server.start() + request.addfinalizer(server.stop) + return server @pytest.fixture -def make_client(request, httpserver): +def make_client(request, capturing_server): def inner(**kwargs): return Client( - "http://foobar@{}/132".format(httpserver.url[len("http://") :]), **kwargs + "http://foobar@{}/132".format(capturing_server.url[len("http://") :]), + **kwargs ) return inner @@ -39,7 +87,7 @@ def inner(**kwargs): @pytest.mark.parametrize("client_flush_method", ["close", "flush"]) @pytest.mark.parametrize("use_pickle", (True, False)) def test_transport_works( - httpserver, + capturing_server, request, capsys, caplog, @@ -49,7 +97,6 @@ def test_transport_works( use_pickle, maybe_monkeypatched_threading, ): - httpserver.serve_content("ok", 200) caplog.set_level(logging.DEBUG) client = make_client(debug=debug) @@ -66,14 +113,12 @@ def test_transport_works( out, err = capsys.readouterr() assert not err and not out - assert httpserver.requests + assert capturing_server.captured assert any("Sending event" in record.msg for record in caplog.records) == debug -def test_transport_infinite_loop(httpserver, request, make_client): - httpserver.serve_content("ok", 200) - +def test_transport_infinite_loop(capturing_server, request, make_client): client = make_client( debug=True, # Make sure we cannot create events from our own logging @@ -84,7 +129,7 @@ def test_transport_infinite_loop(httpserver, request, make_client): capture_message("hi") client.flush() - assert len(httpserver.requests) == 1 + assert len(capturing_server.captured) == 1 NOW = datetime(2014, 6, 2) @@ -122,16 +167,16 @@ def test_parse_rate_limits(input, expected): assert dict(_parse_rate_limits(input, now=NOW)) == expected -def test_simple_rate_limits(httpserver, capsys, caplog, make_client): +def test_simple_rate_limits(capturing_server, capsys, caplog, make_client): client = make_client() - httpserver.serve_content("no", 429, headers={"Retry-After": "4"}) + capturing_server.respond_with(code=429, headers={"Retry-After": "4"}) client.capture_event({"type": "transaction"}) client.flush() - assert len(httpserver.requests) == 1 - assert httpserver.requests[0].url.endswith("/api/132/envelope/") - del httpserver.requests[:] + assert len(capturing_server.captured) == 1 + assert capturing_server.captured[0].path == "/api/132/envelope/" + capturing_server.clear_captured() assert set(client.transport._disabled_until) == set([None]) @@ -139,12 +184,12 @@ def test_simple_rate_limits(httpserver, capsys, caplog, make_client): client.capture_event({"type": "event"}) client.flush() - assert not httpserver.requests + assert not capturing_server.captured @pytest.mark.parametrize("response_code", [200, 429]) def test_data_category_limits( - httpserver, capsys, caplog, response_code, make_client, monkeypatch + capturing_server, capsys, caplog, response_code, make_client, monkeypatch ): client = make_client(send_client_reports=False) @@ -157,18 +202,17 @@ def record_lost_event(reason, data_category=None, item=None): monkeypatch.setattr(client.transport, "record_lost_event", record_lost_event) - httpserver.serve_content( - "hm", - response_code, + capturing_server.respond_with( + code=response_code, headers={"X-Sentry-Rate-Limits": "4711:transaction:organization"}, ) client.capture_event({"type": "transaction"}) client.flush() - assert len(httpserver.requests) == 1 - assert httpserver.requests[0].url.endswith("/api/132/envelope/") - del httpserver.requests[:] + assert len(capturing_server.captured) == 1 + assert capturing_server.captured[0].path == "/api/132/envelope/" + capturing_server.clear_captured() assert set(client.transport._disabled_until) == set(["transaction"]) @@ -176,13 +220,13 @@ def record_lost_event(reason, data_category=None, item=None): client.capture_event({"type": "transaction"}) client.flush() - assert not httpserver.requests + assert not capturing_server.captured client.capture_event({"type": "event"}) client.flush() - assert len(httpserver.requests) == 1 - assert httpserver.requests[0].url.endswith("/api/132/store/") + assert len(capturing_server.captured) == 1 + assert capturing_server.captured[0].path == "/api/132/store/" assert captured_outcomes == [ ("ratelimit_backoff", "transaction"), @@ -192,13 +236,12 @@ def record_lost_event(reason, data_category=None, item=None): @pytest.mark.parametrize("response_code", [200, 429]) def test_data_category_limits_reporting( - httpserver, capsys, caplog, response_code, make_client, monkeypatch + capturing_server, capsys, caplog, response_code, make_client, monkeypatch ): client = make_client(send_client_reports=True) - httpserver.serve_content( - "hm", - response_code, + capturing_server.respond_with( + code=response_code, headers={ "X-Sentry-Rate-Limits": "4711:transaction:organization, 4711:attachment:organization" }, @@ -207,15 +250,15 @@ def test_data_category_limits_reporting( client.capture_event({"type": "transaction"}) client.flush() - assert len(httpserver.requests) == 1 - assert httpserver.requests[0].url.endswith("/api/132/envelope/") - del httpserver.requests[:] + assert len(capturing_server.captured) == 1 + assert capturing_server.captured[0].path == "/api/132/envelope/" + capturing_server.clear_captured() assert set(client.transport._disabled_until) == set(["attachment", "transaction"]) client.capture_event({"type": "transaction"}) client.capture_event({"type": "transaction"}) - del httpserver.requests[:] + capturing_server.clear_captured() # now trick the transport to force flush sending out the stats with the # next envelope @@ -232,8 +275,8 @@ def test_data_category_limits_reporting( # that is normally in the queue. This is quite funny in a way beacuse it means # that the envelope that caused its own over quota report (an error with an # attachment) will include its outcome since it's pending. - assert len(httpserver.requests) == 1 - envelope = get_envelope(httpserver.requests[0]) + assert len(capturing_server.captured) == 1 + envelope = capturing_server.captured[0].envelope assert envelope.items[0].type == "event" assert envelope.items[1].type == "client_report" report = json.loads(envelope.items[1].get_bytes()) @@ -241,20 +284,20 @@ def test_data_category_limits_reporting( {"category": "transaction", "reason": "ratelimit_backoff", "quantity": 2}, {"category": "attachment", "reason": "ratelimit_backoff", "quantity": 11}, ] - del httpserver.requests[:] + capturing_server.clear_captured() # here we sent a normal event client.capture_event({"type": "transaction"}) client.capture_event({"type": "error", "release": "foo"}) client.flush() - assert len(httpserver.requests) == 2 + assert len(capturing_server.captured) == 2 - event = get_event(httpserver.requests[0]) + event = capturing_server.captured[0].event assert event["type"] == "error" assert event["release"] == "foo" - envelope = get_envelope(httpserver.requests[1]) + envelope = capturing_server.captured[1].envelope assert envelope.items[0].type == "client_report" report = json.loads(envelope.items[0].get_bytes()) assert report["discarded_events"] == [ @@ -264,21 +307,20 @@ def test_data_category_limits_reporting( @pytest.mark.parametrize("response_code", [200, 429]) def test_complex_limits_without_data_category( - httpserver, capsys, caplog, response_code, make_client + capturing_server, capsys, caplog, response_code, make_client ): client = make_client() - httpserver.serve_content( - "hm", - response_code, + capturing_server.respond_with( + code=response_code, headers={"X-Sentry-Rate-Limits": "4711::organization"}, ) client.capture_event({"type": "transaction"}) client.flush() - assert len(httpserver.requests) == 1 - assert httpserver.requests[0].url.endswith("/api/132/envelope/") - del httpserver.requests[:] + assert len(capturing_server.captured) == 1 + assert capturing_server.captured[0].path == "/api/132/envelope/" + capturing_server.clear_captured() assert set(client.transport._disabled_until) == set([None]) @@ -287,4 +329,4 @@ def test_complex_limits_without_data_category( client.capture_event({"type": "event"}) client.flush() - assert len(httpserver.requests) == 0 + assert len(capturing_server.captured) == 0 From aad8efdae109cff16fff6a5d58c81e3f7c71ea32 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 7 Sep 2021 11:07:48 +0200 Subject: [PATCH 19/29] fix: lint --- tests/test_transport.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/test_transport.py b/tests/test_transport.py index a2860c1a1c..60b331dcd0 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -9,6 +9,7 @@ from datetime import datetime, timedelta import pytest +from collections import namedtuple from werkzeug.wrappers import Request, Response from pytest_localserver.http import WSGIServer @@ -19,11 +20,7 @@ from sentry_sdk.integrations.logging import LoggingIntegration -class CapturedData(object): - def __init__(self, path, event=None, envelope=None): - self.path = path - self.event = event - self.envelope = envelope +CapturedData = namedtuple("CapturedData", ["path", "event", "envelope"]) class CapturingServer(WSGIServer): From f4a8552fc7c955b4089d2e07dd03dc510363eba5 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 7 Sep 2021 11:24:59 +0200 Subject: [PATCH 20/29] fix: some python 3 stuff --- tests/test_transport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_transport.py b/tests/test_transport.py index 60b331dcd0..81f3dd4247 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -45,7 +45,7 @@ def __call__(self, environ, start_response): request = Request(environ) event = envelope = None if request.mimetype == "application/json": - event = json.load(gzip.GzipFile(fileobj=io.BytesIO(request.data))) + event = json.loads(gzip.GzipFile(fileobj=io.BytesIO(request.data)).read()) else: envelope = Envelope.deserialize_from( gzip.GzipFile(fileobj=io.BytesIO(request.data)) From ccec734895c6615806c91e4d4bf7e5e183058820 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 7 Sep 2021 13:20:28 +0200 Subject: [PATCH 21/29] fix: make envelope parsing work on all python 3 versions --- sentry_sdk/envelope.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/envelope.py b/sentry_sdk/envelope.py index 65d8f9e56d..35ca2eed69 100644 --- a/sentry_sdk/envelope.py +++ b/sentry_sdk/envelope.py @@ -2,7 +2,7 @@ import json import mimetypes -from sentry_sdk._compat import text_type +from sentry_sdk._compat import text_type, PY2 from sentry_sdk._types import MYPY from sentry_sdk.session import Session from sentry_sdk.utils import json_dumps, capture_internal_exceptions @@ -18,6 +18,13 @@ from sentry_sdk._types import Event, EventDataCategory +def parse_json(data): + # on some python 3 versions this needs to be bytes + if not PY2 and isinstance(data, bytes): + data = data.decode("utf-8", "replace") + return json.loads(data) + + class Envelope(object): def __init__( self, @@ -114,7 +121,7 @@ def deserialize_from( cls, f # type: Any ): # type: (...) -> Envelope - headers = json.loads(f.readline()) + headers = parse_json(f.readline()) items = [] while 1: item = Item.deserialize_from(f) @@ -286,11 +293,11 @@ def deserialize_from( line = f.readline().rstrip() if not line: return None - headers = json.loads(line) + headers = parse_json(line) length = headers["length"] payload = f.read(length) if headers.get("type") in ("event", "transaction"): - rv = cls(headers=headers, payload=PayloadRef(json=json.loads(payload))) + rv = cls(headers=headers, payload=PayloadRef(json=parse_json(payload))) else: rv = cls(headers=headers, payload=payload) f.readline() From dbbad2b0fb35e49df51673cc65281cc19aeeba16 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 7 Sep 2021 13:32:39 +0200 Subject: [PATCH 22/29] fix: lint --- sentry_sdk/envelope.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sentry_sdk/envelope.py b/sentry_sdk/envelope.py index 35ca2eed69..ebb2842000 100644 --- a/sentry_sdk/envelope.py +++ b/sentry_sdk/envelope.py @@ -19,6 +19,7 @@ def parse_json(data): + # type: (Union[bytes, text_type]) -> Any # on some python 3 versions this needs to be bytes if not PY2 and isinstance(data, bytes): data = data.decode("utf-8", "replace") From 768a27eb6168f118c293d3fa563adc9c5110d740 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 7 Sep 2021 13:41:57 +0200 Subject: [PATCH 23/29] fix: python 3 stuff --- tests/test_transport.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_transport.py b/tests/test_transport.py index 81f3dd4247..7f95ddc540 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -16,7 +16,7 @@ from sentry_sdk import Hub, Client, add_breadcrumb, capture_message, Scope from sentry_sdk.transport import _parse_rate_limits -from sentry_sdk.envelope import Envelope +from sentry_sdk.envelope import Envelope, parse_json from sentry_sdk.integrations.logging import LoggingIntegration @@ -45,7 +45,7 @@ def __call__(self, environ, start_response): request = Request(environ) event = envelope = None if request.mimetype == "application/json": - event = json.loads(gzip.GzipFile(fileobj=io.BytesIO(request.data)).read()) + event = parse_json(gzip.GzipFile(fileobj=io.BytesIO(request.data)).read()) else: envelope = Envelope.deserialize_from( gzip.GzipFile(fileobj=io.BytesIO(request.data)) From 8f462f6ec20317a7aff47be120c1310c1463a254 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 7 Sep 2021 16:58:29 +0200 Subject: [PATCH 24/29] fix: more old python stuff --- tests/test_transport.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_transport.py b/tests/test_transport.py index 7f95ddc540..1bea71f4b9 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -276,7 +276,7 @@ def test_data_category_limits_reporting( envelope = capturing_server.captured[0].envelope assert envelope.items[0].type == "event" assert envelope.items[1].type == "client_report" - report = json.loads(envelope.items[1].get_bytes()) + report = parse_json(envelope.items[1].get_bytes()) assert report["discarded_events"] == [ {"category": "transaction", "reason": "ratelimit_backoff", "quantity": 2}, {"category": "attachment", "reason": "ratelimit_backoff", "quantity": 11}, @@ -296,7 +296,7 @@ def test_data_category_limits_reporting( envelope = capturing_server.captured[1].envelope assert envelope.items[0].type == "client_report" - report = json.loads(envelope.items[0].get_bytes()) + report = parse_json(envelope.items[0].get_bytes()) assert report["discarded_events"] == [ {"category": "transaction", "reason": "ratelimit_backoff", "quantity": 1}, ] From c06be048f0c716c4be45a3d024e21a882b6b7e3c Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 7 Sep 2021 19:05:17 +0200 Subject: [PATCH 25/29] fix: moar lint --- tests/test_transport.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_transport.py b/tests/test_transport.py index 1bea71f4b9..2ec283fc00 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -2,7 +2,6 @@ import time import logging import pickle -import json import gzip import io From 7f0c74fffc4a492399c3c29a196638b7dec329f7 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Wed, 8 Sep 2021 10:41:09 +0200 Subject: [PATCH 26/29] fix: race --- tests/test_transport.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/test_transport.py b/tests/test_transport.py index 2ec283fc00..64ac319624 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -243,6 +243,17 @@ def test_data_category_limits_reporting( }, ) + outcomes_enabled = True + real_fetch = client.transport._fetch_pending_client_report + + def intercepting_fetch(*args, **kwargs): + if outcomes_enabled: + return real_fetch(*args, **kwargs) + + monkeypatch.setattr( + client.transport, "_fetch_pending_client_report", intercepting_fetch + ) + client.capture_event({"type": "transaction"}) client.flush() @@ -252,18 +263,18 @@ def test_data_category_limits_reporting( assert set(client.transport._disabled_until) == set(["attachment", "transaction"]) + outcomes_enabled = False client.capture_event({"type": "transaction"}) client.capture_event({"type": "transaction"}) capturing_server.clear_captured() - # now trick the transport to force flush sending out the stats with the - # next envelope - time.sleep(0.2) + # flush out the events but don't flush the client reports + client.flush() client.transport._last_client_report_sent = 0 + outcomes_enabled = True scope = Scope() scope.add_attachment(bytes=b"Hello World", filename="hello.txt") - client.capture_event({"type": "error"}, scope=scope) client.flush() From 3ecc7257354c70824e26af8d75b7de71c82543e0 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Wed, 8 Sep 2021 12:06:34 +0200 Subject: [PATCH 27/29] fix: race more --- tests/test_transport.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_transport.py b/tests/test_transport.py index 64ac319624..3d560e551e 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -1,5 +1,4 @@ # coding: utf-8 -import time import logging import pickle import gzip @@ -243,7 +242,7 @@ def test_data_category_limits_reporting( }, ) - outcomes_enabled = True + outcomes_enabled = False real_fetch = client.transport._fetch_pending_client_report def intercepting_fetch(*args, **kwargs): @@ -253,6 +252,8 @@ def intercepting_fetch(*args, **kwargs): monkeypatch.setattr( client.transport, "_fetch_pending_client_report", intercepting_fetch ) + # get rid of threading making things hard to track + monkeypatch.setattr(client.transport._worker, "submit", lambda x: x() or True) client.capture_event({"type": "transaction"}) client.flush() @@ -263,7 +264,6 @@ def intercepting_fetch(*args, **kwargs): assert set(client.transport._disabled_until) == set(["attachment", "transaction"]) - outcomes_enabled = False client.capture_event({"type": "transaction"}) client.capture_event({"type": "transaction"}) capturing_server.clear_captured() From 89b5777212275fcd111e9d5dba97e9279aafe7ce Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sat, 11 Sep 2021 00:14:59 +0200 Subject: [PATCH 28/29] fix: count empty attachments as 1 --- sentry_sdk/transport.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index d843f0267f..bcaebf37b7 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -176,7 +176,9 @@ def record_lost_event( if item is not None: data_category = item.data_category if data_category == "attachment": - quantity = len(item.get_bytes()) + # quantity of 0 is actually 1 as we do not want to count + # empty attachments as actually empty. + quantity = len(item.get_bytes()) or 1 elif data_category is None: raise TypeError("data category not provided") From f63464dea727540cc8dbcd0176fb6fffcda53e89 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 13 Sep 2021 11:24:30 +0200 Subject: [PATCH 29/29] fix: order outcomes before assert --- tests/test_transport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_transport.py b/tests/test_transport.py index 3d560e551e..0ce155e6e6 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -287,7 +287,7 @@ def intercepting_fetch(*args, **kwargs): assert envelope.items[0].type == "event" assert envelope.items[1].type == "client_report" report = parse_json(envelope.items[1].get_bytes()) - assert report["discarded_events"] == [ + assert sorted(report["discarded_events"], key=lambda x: x["quantity"]) == [ {"category": "transaction", "reason": "ratelimit_backoff", "quantity": 2}, {"category": "attachment", "reason": "ratelimit_backoff", "quantity": 11}, ]