From 9d60a9ac3045f6279166117eed154f9f38cc49b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Mon, 30 Dec 2024 17:25:07 +0100 Subject: [PATCH 1/6] Make the default retry policy the aggressive one with half the attempts --- docs/use/api.rst | 25 +++++----- tests/test_retry.py | 115 +++++++++++++++----------------------------- zyte_api/_retry.py | 97 ++++++++++++++++++------------------- 3 files changed, 98 insertions(+), 139 deletions(-) diff --git a/docs/use/api.rst b/docs/use/api.rst index 8e3d34e..c6983bf 100644 --- a/docs/use/api.rst +++ b/docs/use/api.rst @@ -148,32 +148,29 @@ retries for :ref:`rate-limiting ` and :ref:`unsuccessful .. _default-retry-policy: The default retry policy, :data:`~zyte_api.zyte_api_retrying`, does the -following: +following for each request: - Retries :ref:`rate-limiting responses ` forever. -- Retries :ref:`temporary download errors - ` up to 3 times. +- Retries :ref:`temporary download errors ` + up to 3 times. :ref:`Permanent download errors + ` also count towards this retry limit. + +- Retries permanent download errors up to 3 times per request. - Retries network errors until they have happened for 15 minutes straight. +- Retries error responses with an HTTP status code in the 500-599 range (503, + 520 and 521 excluded) up to 3 times. + All retries are done with an exponential backoff algorithm. .. _aggressive-retry-policy: If some :ref:`unsuccessful responses ` exceed maximum retries with the default retry policy, try using -:data:`~zyte_api.aggressive_retrying` instead, which modifies the default retry -policy as follows: - -- Temporary download error are retried 7 times. :ref:`Permanent download - errors ` also count towards this retry - limit. - -- Retries permanent download errors up to 3 times. - -- Retries error responses with an HTTP status code in the 500-599 range (503, - 520 and 521 excluded) up to 3 times. +:data:`~zyte_api.aggressive_retrying` instead, which duplicates attempts for +all retry scenarios. Alternatively, the reference documentation of :class:`~zyte_api.RetryFactory` and :class:`~zyte_api.AggressiveRetryFactory` features some examples of custom diff --git a/tests/test_retry.py b/tests/test_retry.py index ef8f2d0..86e0293 100644 --- a/tests/test_retry.py +++ b/tests/test_retry.py @@ -140,6 +140,15 @@ def __init__(self, time): self.time = time +class scale: + + def __init__(self, factor): + self.factor = factor + + def __call__(self, number, add=0): + return int(number * self.factor) + add + + @pytest.mark.parametrize( ("retrying", "outcomes", "exhausted"), ( @@ -237,81 +246,36 @@ def __init__(self, time): ), ) ), - # Behaviors specific to the default retry policy + # Scaled behaviors, where the default retry policy uses half as many + # attempts as the aggressive retry policy. *( - (zyte_api_retrying, outcomes, exhausted) - for outcomes, exhausted in ( - # Temporary download errors are retried until they have - # happened 4 times in total. - ( - (mock_request_error(status=520),) * 3, - False, - ), - ( - (mock_request_error(status=520),) * 4, - True, - ), - ( - ( - *(mock_request_error(status=429),) * 2, - mock_request_error(status=520), - ), - False, - ), - ( - ( - *(mock_request_error(status=429),) * 3, - mock_request_error(status=520), - ), - False, - ), - ( - ( - *( - mock_request_error(status=429), - mock_request_error(status=520), - ) - * 3, - ), - False, - ), - ( - ( - *( - mock_request_error(status=429), - mock_request_error(status=520), - ) - * 4, - ), - True, - ), + (retrying, outcomes, exhausted) + for retrying, scaled in ( + (zyte_api_retrying, scale(0.5)), + (aggressive_retrying, scale(1)), ) - ), - # Behaviors specific to the aggressive retry policy - *( - (aggressive_retrying, outcomes, exhausted) for outcomes, exhausted in ( # Temporary download errors are retried until they have - # happened 8 times in total. Permanent download errors also - # count towards that limit. + # happened 8*factor times in total. Permanent download errors + # also count towards that limit. ( - (mock_request_error(status=520),) * 7, + (mock_request_error(status=520),) * scaled(8, -1), False, ), ( - (mock_request_error(status=520),) * 8, + (mock_request_error(status=520),) * scaled(8), True, ), ( ( - *(mock_request_error(status=429),) * 6, + *(mock_request_error(status=429),) * scaled(8, -2), mock_request_error(status=520), ), False, ), ( ( - *(mock_request_error(status=429),) * 7, + *(mock_request_error(status=429),) * scaled(8, -1), mock_request_error(status=520), ), False, @@ -322,7 +286,7 @@ def __init__(self, time): mock_request_error(status=429), mock_request_error(status=520), ) - * 7, + * scaled(8, -1), ), False, ), @@ -332,13 +296,13 @@ def __init__(self, time): mock_request_error(status=429), mock_request_error(status=520), ) - * 8, + * scaled(8), ), True, ), ( ( - *(mock_request_error(status=520),) * 5, + *(mock_request_error(status=520),) * scaled(8, -3), *(mock_request_error(status=521),) * 1, *(mock_request_error(status=520),) * 1, ), @@ -346,7 +310,7 @@ def __init__(self, time): ), ( ( - *(mock_request_error(status=520),) * 6, + *(mock_request_error(status=520),) * scaled(8, -2), *(mock_request_error(status=521),) * 1, *(mock_request_error(status=520),) * 1, ), @@ -354,29 +318,30 @@ def __init__(self, time): ), ( ( - *(mock_request_error(status=520),) * 6, + *(mock_request_error(status=520),) * scaled(8, -2), *(mock_request_error(status=521),) * 1, ), False, ), ( ( - *(mock_request_error(status=520),) * 7, + *(mock_request_error(status=520),) * scaled(8, -1), *(mock_request_error(status=521),) * 1, ), True, ), # Permanent download errors are retried until they have - # happened 4 times in total. + # happened 4*factor times in total. ( - (*(mock_request_error(status=521),) * 3,), + (*(mock_request_error(status=521),) * scaled(4, -1),), False, ), ( - (*(mock_request_error(status=521),) * 4,), + (*(mock_request_error(status=521),) * scaled(4),), True, ), - # Undocumented 5xx errors are retried up to 3 times. + # Undocumented 5xx errors are retried until they have happened + # 4*factor times. *( scenario for status in ( @@ -386,16 +351,16 @@ def __init__(self, time): ) for scenario in ( ( - (*(mock_request_error(status=status),) * 3,), + (*(mock_request_error(status=status),) * scaled(4, -1),), False, ), ( - (*(mock_request_error(status=status),) * 4,), + (*(mock_request_error(status=status),) * scaled(4),), True, ), ( ( - *(mock_request_error(status=status),) * 2, + *(mock_request_error(status=status),) * scaled(4, -2), mock_request_error(status=429), mock_request_error(status=503), ServerConnectionError(), @@ -405,7 +370,7 @@ def __init__(self, time): ), ( ( - *(mock_request_error(status=status),) * 3, + *(mock_request_error(status=status),) * scaled(4, -1), mock_request_error(status=429), mock_request_error(status=503), ServerConnectionError(), @@ -415,17 +380,15 @@ def __init__(self, time): ), ( ( - mock_request_error(status=status), mock_request_error(status=555), - mock_request_error(status=status), + *(mock_request_error(status=status),) * scaled(4, -2), ), False, ), ( ( - mock_request_error(status=status), mock_request_error(status=555), - *(mock_request_error(status=status),) * 2, + *(mock_request_error(status=status),) * scaled(4, -1), ), True, ), @@ -464,7 +427,7 @@ async def run(): try: await run() except Exception as outcome: - assert exhausted + assert exhausted, outcome assert outcome is last_outcome else: assert not exhausted diff --git a/zyte_api/_retry.py b/zyte_api/_retry.py index 3b01c0e..90bdcad 100644 --- a/zyte_api/_retry.py +++ b/zyte_api/_retry.py @@ -54,10 +54,6 @@ def _is_throttling_error(exc: BaseException) -> bool: return isinstance(exc, RequestError) and exc.status in (429, 503) -def _is_temporary_download_error(exc: BaseException) -> bool: - return isinstance(exc, RequestError) and exc.status == 520 - - class stop_on_count(stop_base): """Keep a call count with the specified counter name, and stop after the specified number os calls. @@ -128,6 +124,42 @@ def __call__(self, retry_state: "RetryCallState") -> bool: return True +class stop_on_download_error(stop_base): + """Stop after the specified max numbers of total or permanent download + errors.""" + + def __init__(self, max_total: int, max_permanent: int) -> None: + self._max_total = max_total + self._max_permanent = max_permanent + + def __call__(self, retry_state: "RetryCallState") -> bool: + if not hasattr(retry_state, "counter"): + retry_state.counter = Counter() # type: ignore + assert retry_state.outcome, "Unexpected empty outcome" + exc = retry_state.outcome.exception() + assert exc, "Unexpected empty exception" + if exc.status == 521: # type: ignore + retry_state.counter["permanent_download_error"] += 1 # type: ignore + if retry_state.counter["permanent_download_error"] >= self._max_permanent: # type: ignore + return True + retry_state.counter["download_error"] += 1 # type: ignore + if retry_state.counter["download_error"] >= self._max_total: # type: ignore + return True + return False + + +def _download_error(exc: BaseException) -> bool: + return isinstance(exc, RequestError) and exc.status in {520, 521} + + +def _undocumented_error(exc: BaseException) -> bool: + return ( + isinstance(exc, RequestError) + and exc.status >= 500 + and exc.status not in {503, 520, 521} + ) + + class RetryFactory: """Factory class that builds the :class:`tenacity.AsyncRetrying` object that defines the :ref:`default retry policy `. @@ -160,7 +192,8 @@ class CustomRetryFactory(RetryFactory): retry_condition: retry_base = ( retry_if_exception(_is_throttling_error) | retry_if_exception(_is_network_error) - | retry_if_exception(_is_temporary_download_error) + | retry_if_exception(_download_error) + | retry_if_exception(_undocumented_error) ) # throttling throttling_wait = wait_chain( @@ -182,7 +215,10 @@ class CustomRetryFactory(RetryFactory): temporary_download_error_wait = network_error_wait throttling_stop = stop_never network_error_stop = stop_after_uninterrupted_delay(15 * 60) - temporary_download_error_stop = stop_on_count(4) + temporary_download_error_stop = stop_on_download_error(max_total=4, max_permanent=2) + + undocumented_error_stop = stop_on_count(2) + undocumented_error_wait = network_error_wait def wait(self, retry_state: RetryCallState) -> float: assert retry_state.outcome, "Unexpected empty outcome" @@ -192,7 +228,9 @@ def wait(self, retry_state: RetryCallState) -> float: return self.throttling_wait(retry_state=retry_state) if _is_network_error(exc): return self.network_error_wait(retry_state=retry_state) - assert _is_temporary_download_error(exc) # See retry_condition + if _undocumented_error(exc): + return self.undocumented_error_wait(retry_state=retry_state) + assert _download_error(exc) # See retry_condition return self.temporary_download_error_wait(retry_state=retry_state) def stop(self, retry_state: RetryCallState) -> bool: @@ -203,7 +241,9 @@ def stop(self, retry_state: RetryCallState) -> bool: return self.throttling_stop(retry_state) if _is_network_error(exc): return self.network_error_stop(retry_state) - assert _is_temporary_download_error(exc) # See retry_condition + if _undocumented_error(exc): + return self.undocumented_error_stop(retry_state) + assert _download_error(exc) # See retry_condition return self.temporary_download_error_stop(retry_state) def reraise(self) -> bool: @@ -224,42 +264,6 @@ def build(self) -> AsyncRetrying: zyte_api_retrying: AsyncRetrying = RetryFactory().build() -def _download_error(exc: BaseException) -> bool: - return isinstance(exc, RequestError) and exc.status in {520, 521} - - -def _undocumented_error(exc: BaseException) -> bool: - return ( - isinstance(exc, RequestError) - and exc.status >= 500 - and exc.status not in {503, 520, 521} - ) - - -class stop_on_download_error(stop_base): - """Stop after the specified max numbers of total or permanent download - errors.""" - - def __init__(self, max_total: int, max_permanent: int) -> None: - self._max_total = max_total - self._max_permanent = max_permanent - - def __call__(self, retry_state: "RetryCallState") -> bool: - if not hasattr(retry_state, "counter"): - retry_state.counter = Counter() # type: ignore - assert retry_state.outcome, "Unexpected empty outcome" - exc = retry_state.outcome.exception() - assert exc, "Unexpected empty exception" - if exc.status == 521: # type: ignore - retry_state.counter["permanent_download_error"] += 1 # type: ignore - if retry_state.counter["permanent_download_error"] >= self._max_permanent: # type: ignore - return True - retry_state.counter["download_error"] += 1 # type: ignore - if retry_state.counter["download_error"] >= self._max_total: # type: ignore - return True - return False - - class AggressiveRetryFactory(RetryFactory): """Factory class that builds the :class:`tenacity.AsyncRetrying` object that defines the :ref:`aggressive retry policy `. @@ -300,7 +304,6 @@ class CustomRetryFactory(AggressiveRetryFactory): download_error_wait = RetryFactory.temporary_download_error_wait undocumented_error_stop = stop_on_count(4) - undocumented_error_wait = RetryFactory.temporary_download_error_wait def stop(self, retry_state: RetryCallState) -> bool: assert retry_state.outcome, "Unexpected empty outcome" @@ -308,8 +311,6 @@ def stop(self, retry_state: RetryCallState) -> bool: assert exc, "Unexpected empty exception" if _download_error(exc): return self.download_error_stop(retry_state) - if _undocumented_error(exc): - return self.undocumented_error_stop(retry_state) return super().stop(retry_state) def wait(self, retry_state: RetryCallState) -> float: @@ -318,8 +319,6 @@ def wait(self, retry_state: RetryCallState) -> float: assert exc, "Unexpected empty exception" if _download_error(exc): return self.download_error_wait(retry_state) - if _undocumented_error(exc): - return self.undocumented_error_wait(retry_state=retry_state) return super().wait(retry_state) From d816d4ac699234707bb7127202f97bdf10c311c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Tue, 31 Dec 2024 10:46:34 +0100 Subject: [PATCH 2/6] Update docs/use/api.rst Co-authored-by: Mikhail Korobov --- docs/use/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/use/api.rst b/docs/use/api.rst index c6983bf..59f25a0 100644 --- a/docs/use/api.rst +++ b/docs/use/api.rst @@ -169,7 +169,7 @@ All retries are done with an exponential backoff algorithm. If some :ref:`unsuccessful responses ` exceed maximum retries with the default retry policy, try using -:data:`~zyte_api.aggressive_retrying` instead, which duplicates attempts for +:data:`~zyte_api.aggressive_retrying` instead, which doubles attempts for all retry scenarios. Alternatively, the reference documentation of :class:`~zyte_api.RetryFactory` From 26f21d0373af60a5d1ab11bb50016d723efb684d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Tue, 31 Dec 2024 10:48:13 +0100 Subject: [PATCH 3/6] Update api.rst --- docs/use/api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/use/api.rst b/docs/use/api.rst index 59f25a0..3b6cda4 100644 --- a/docs/use/api.rst +++ b/docs/use/api.rst @@ -156,12 +156,12 @@ following for each request: up to 3 times. :ref:`Permanent download errors ` also count towards this retry limit. -- Retries permanent download errors up to 3 times per request. +- Retries permanent download errors once per request. - Retries network errors until they have happened for 15 minutes straight. - Retries error responses with an HTTP status code in the 500-599 range (503, - 520 and 521 excluded) up to 3 times. + 520 and 521 excluded) once. All retries are done with an exponential backoff algorithm. From 1d6645eff5c4d66c66882d6c5eb3b54efa955206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Tue, 31 Dec 2024 10:48:52 +0100 Subject: [PATCH 4/6] Update api.rst --- docs/use/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/use/api.rst b/docs/use/api.rst index 3b6cda4..cc1f488 100644 --- a/docs/use/api.rst +++ b/docs/use/api.rst @@ -156,7 +156,7 @@ following for each request: up to 3 times. :ref:`Permanent download errors ` also count towards this retry limit. -- Retries permanent download errors once per request. +- Retries permanent download errors once. - Retries network errors until they have happened for 15 minutes straight. From d966adb74bdfef3dd676452484720af4dcaee3c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Sun, 16 Feb 2025 22:33:19 +0100 Subject: [PATCH 5/6] Remove the temporary_ prefix from methods that now also handle permanent download errors --- CHANGES.rst | 13 +++++++++++++ tests/test_retry.py | 2 +- zyte_api/_retry.py | 41 ++++++++--------------------------------- 3 files changed, 22 insertions(+), 34 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index fb5fff1..9ca2551 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,19 @@ Changes ======= +Unreleased +---------- + +* **Backward-incompatible:** Renamed some methods of + :class:`~.RetryFactory` for consistency, since they now handle both temporary + and permanent download errors: + + * ``temporary_download_error_stop`` → + :meth:`~.RetryFactory.download_error_stop` + + * ``temporary_download_error_wait`` → + :meth:`~.RetryFactory.download_error_wait` + 0.6.0 (2024-05-29) ------------------ diff --git a/tests/test_retry.py b/tests/test_retry.py index 12c6e72..fa2ce03 100644 --- a/tests/test_retry.py +++ b/tests/test_retry.py @@ -66,7 +66,7 @@ def broken_stop(_): ("retry_factory", "status", "waiter"), [ (RetryFactory, 429, "throttling"), - (RetryFactory, 520, "temporary_download_error"), + (RetryFactory, 520, "download_error"), (AggressiveRetryFactory, 429, "throttling"), (AggressiveRetryFactory, 500, "undocumented_error"), (AggressiveRetryFactory, 520, "download_error"), diff --git a/zyte_api/_retry.py b/zyte_api/_retry.py index 32392d6..1df83a2 100644 --- a/zyte_api/_retry.py +++ b/zyte_api/_retry.py @@ -166,22 +166,21 @@ class RetryFactory: modify it as needed, and then call :meth:`build` on your subclass to get the corresponding :class:`tenacity.AsyncRetrying` object. - For example, to double the number of attempts for :ref:`temporary - download errors ` and the time network - errors are retried: + For example, to double the number of attempts for download errors and the + time network errors are retried: .. code-block:: python from zyte_api import ( RetryFactory, stop_after_uninterrupted_delay, - stop_on_count, + stop_on_download_error, ) class CustomRetryFactory(RetryFactory): network_error_stop = stop_after_uninterrupted_delay(30 * 60) - temporary_download_error_stop = stop_on_count(8) + download_error_stop = stop_on_download_error(max_total=8, max_permanent=4) CUSTOM_RETRY_POLICY = CustomRetryFactory().build() @@ -209,10 +208,10 @@ class CustomRetryFactory(RetryFactory): # wait from 3s to ~1m wait_random(3, 7) + wait_random_exponential(multiplier=1, max=55) ) - temporary_download_error_wait = network_error_wait + download_error_wait = network_error_wait throttling_stop = stop_never network_error_stop = stop_after_uninterrupted_delay(15 * 60) - temporary_download_error_stop = stop_on_download_error(max_total=4, max_permanent=2) + download_error_stop = stop_on_download_error(max_total=4, max_permanent=2) undocumented_error_stop = stop_on_count(2) undocumented_error_wait = network_error_wait @@ -228,7 +227,7 @@ def wait(self, retry_state: RetryCallState) -> float: if _undocumented_error(exc): return self.undocumented_error_wait(retry_state=retry_state) assert _download_error(exc) # See retry_condition - return self.temporary_download_error_wait(retry_state=retry_state) + return self.download_error_wait(retry_state=retry_state) def stop(self, retry_state: RetryCallState) -> bool: assert retry_state.outcome, "Unexpected empty outcome" @@ -241,7 +240,7 @@ def stop(self, retry_state: RetryCallState) -> bool: if _undocumented_error(exc): return self.undocumented_error_stop(retry_state) assert _download_error(exc) # See retry_condition - return self.temporary_download_error_stop(retry_state) + return self.download_error_stop(retry_state) def reraise(self) -> bool: return True @@ -291,32 +290,8 @@ class CustomRetryFactory(AggressiveRetryFactory): CUSTOM_RETRY_POLICY = CustomRetryFactory().build() """ - retry_condition = ( - RetryFactory.retry_condition - | retry_if_exception(_download_error) - | retry_if_exception(_undocumented_error) - ) - download_error_stop = stop_on_download_error(max_total=8, max_permanent=4) - download_error_wait = RetryFactory.temporary_download_error_wait - undocumented_error_stop = stop_on_count(4) - def stop(self, retry_state: RetryCallState) -> bool: - assert retry_state.outcome, "Unexpected empty outcome" - exc = retry_state.outcome.exception() - assert exc, "Unexpected empty exception" - if _download_error(exc): - return self.download_error_stop(retry_state) - return super().stop(retry_state) - - def wait(self, retry_state: RetryCallState) -> float: - assert retry_state.outcome, "Unexpected empty outcome" - exc = retry_state.outcome.exception() - assert exc, "Unexpected empty exception" - if _download_error(exc): - return self.download_error_wait(retry_state) - return super().wait(retry_state) - aggressive_retrying = AggressiveRetryFactory().build() From 4bacd05490852b8dd811c44131bdee357ff820a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Sun, 16 Feb 2025 22:38:59 +0100 Subject: [PATCH 6/6] Restore mypy skips --- zyte_api/_retry.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/zyte_api/_retry.py b/zyte_api/_retry.py index 1df83a2..994ce6c 100644 --- a/zyte_api/_retry.py +++ b/zyte_api/_retry.py @@ -134,16 +134,16 @@ def __init__(self, max_total: int, max_permanent: int) -> None: def __call__(self, retry_state: RetryCallState) -> bool: if not hasattr(retry_state, "counter"): - retry_state.counter = Counter() + retry_state.counter = Counter() # type: ignore[attr-defined] assert retry_state.outcome, "Unexpected empty outcome" exc = retry_state.outcome.exception() assert exc, "Unexpected empty exception" - if exc.status == 521: - retry_state.counter["permanent_download_error"] += 1 - if retry_state.counter["permanent_download_error"] >= self._max_permanent: + if exc.status == 521: # type: ignore[attr-defined] + retry_state.counter["permanent_download_error"] += 1 # type: ignore[attr-defined] + if retry_state.counter["permanent_download_error"] >= self._max_permanent: # type: ignore[attr-defined] return True - retry_state.counter["download_error"] += 1 - return retry_state.counter["download_error"] >= self._max_total + retry_state.counter["download_error"] += 1 # type: ignore[attr-defined] + return retry_state.counter["download_error"] >= self._max_total # type: ignore[attr-defined] def _download_error(exc: BaseException) -> bool: