diff --git a/docs/ref/api.rst b/docs/ref/api.rst index 5a6c4ab..70164ed 100644 --- a/docs/ref/api.rst +++ b/docs/ref/api.rst @@ -26,8 +26,13 @@ Retries .. autodata:: zyte_api_retrying :no-value: +.. autodata:: aggressive_retrying + :no-value: + .. autoclass:: RetryFactory +.. autoclass:: AggressiveRetryFactory + Errors ====== diff --git a/docs/use/api.rst b/docs/use/api.rst index 4f1522a..7d05a47 100644 --- a/docs/use/api.rst +++ b/docs/use/api.rst @@ -152,20 +152,43 @@ following: - Retries :ref:`rate-limiting responses ` forever. -- Retries :ref:`unsuccessful responses ` up - to 3 times. +- Retries :ref:`temporary download errors + ` up to 3 times. -- Retries network errors for up to 15 minutes. +- Retries network errors until they have happened for 15 minutes straight. All retries are done with an exponential backoff algorithm. -To customize the retry policy, create your own :class:`~tenacity.AsyncRetrying` -object, e.g. using a custom subclass of :data:`~zyte_api.RetryFactory`, and -pass it when creating your client object: +.. _aggressive-retry-policy: + +If some :ref:`unsuccessful responses ` exceed +maximum retries with the default retry policy, try using +:data:`~zyte_api.aggressive_retrying` instead, which modifies the default retry +policy as follows: + +- Temporary download error are retried 7 times. :ref:`Permanent download + errors ` also count towards this retry + limit. + +- Retries permanent download errors up to 3 times. + +- Retries error responses with an HTTP status code in the 500-599 range (503, + 520 and 521 excluded) up to 3 times. + +Alternatively, the reference documentation of :class:`~zyte_api.RetryFactory` +and :class:`~zyte_api.AggressiveRetryFactory` features some examples of custom +retry policies, and you can always build your own +:class:`~tenacity.AsyncRetrying` object from scratch. + +To use :data:`~zyte_api.aggressive_retrying` or a custom retry policy, pass an +instance of your :class:`~tenacity.AsyncRetrying` subclass when creating your +client object: .. code-block:: python - client = ZyteAPI(retrying=custom_retry_policy) + from zyte_api import ZyteAPI, aggressive_retrying + + client = ZyteAPI(retrying=aggressive_retrying) When retries are exceeded for a given request, an exception is raised. Except for the :meth:`~ZyteAPI.iter` method of the :ref:`sync API `, which diff --git a/tests/mockserver.py b/tests/mockserver.py index 40b1d6e..023b72f 100644 --- a/tests/mockserver.py +++ b/tests/mockserver.py @@ -86,6 +86,9 @@ def render_POST(self, request): request.setResponseCode(429) response_data = {"status": 429, "type": "/limits/over-user-limit"} return json.dumps(response_data).encode() + if domain == "e500.example": + request.setResponseCode(500) + return "" if domain == "e520.example": request.setResponseCode(520) response_data = {"status": 520, "type": "/download/temporary-error"} diff --git a/tests/test_async.py b/tests/test_async.py index 2110f63..3f33ce7 100644 --- a/tests/test_async.py +++ b/tests/test_async.py @@ -2,17 +2,13 @@ from unittest.mock import AsyncMock import pytest -from tenacity import AsyncRetrying -from zyte_api import AsyncZyteAPI, RequestError -from zyte_api._retry import RetryFactory +from zyte_api import AggressiveRetryFactory, AsyncZyteAPI, RequestError from zyte_api.aio.client import AsyncClient from zyte_api.apikey import NoApiKey from zyte_api.errors import ParsedError from zyte_api.utils import USER_AGENT -from .mockserver import DropResource, MockServer - @pytest.mark.parametrize( ("client_cls",), @@ -72,46 +68,6 @@ async def test_get(client_cls, get_method, mockserver): assert actual_result == expected_result -UNSET = object() - - -class OutlierException(RuntimeError): - pass - - -@pytest.mark.parametrize( - ("client_cls", "get_method"), - ( - (AsyncZyteAPI, "get"), - (AsyncClient, "request_raw"), - ), -) -@pytest.mark.parametrize( - ("value", "exception"), - ( - (UNSET, OutlierException), - (True, OutlierException), - (False, RequestError), - ), -) -@pytest.mark.asyncio -async def test_get_handle_retries(client_cls, get_method, value, exception, mockserver): - kwargs = {} - if value is not UNSET: - kwargs["handle_retries"] = value - - def broken_stop(_): - raise OutlierException - - retrying = AsyncRetrying(stop=broken_stop) - client = client_cls(api_key="a", api_url=mockserver.urljoin("/"), retrying=retrying) - with pytest.raises(exception): - await getattr(client, get_method)( - {"url": "https://exception.example", "browserHtml": True}, - **kwargs, - ) - - @pytest.mark.parametrize( ("client_cls", "get_method"), ( @@ -234,132 +190,6 @@ async def test_iter(client_cls, iter_method, mockserver): assert actual_result in expected_results -@pytest.mark.parametrize( - ("client_cls", "get_method"), - ( - (AsyncZyteAPI, "get"), - (AsyncClient, "request_raw"), - ), -) -@pytest.mark.parametrize( - ("subdomain", "waiter"), - ( - ("e429", "throttling"), - ("e520", "temporary_download_error"), - ), -) -@pytest.mark.asyncio -async def test_retry_wait(client_cls, get_method, subdomain, waiter, mockserver): - def broken_wait(self, retry_state): - raise OutlierException - - class CustomRetryFactory(RetryFactory): - pass - - setattr(CustomRetryFactory, f"{waiter}_wait", broken_wait) - - retrying = CustomRetryFactory().build() - client = client_cls(api_key="a", api_url=mockserver.urljoin("/"), retrying=retrying) - with pytest.raises(OutlierException): - await getattr(client, get_method)( - {"url": f"https://{subdomain}.example", "browserHtml": True}, - ) - - -@pytest.mark.parametrize( - ("client_cls", "get_method"), - ( - (AsyncZyteAPI, "get"), - (AsyncClient, "request_raw"), - ), -) -@pytest.mark.asyncio -async def test_retry_wait_network_error(client_cls, get_method): - waiter = "network_error" - - def broken_wait(self, retry_state): - raise OutlierException - - class CustomRetryFactory(RetryFactory): - pass - - setattr(CustomRetryFactory, f"{waiter}_wait", broken_wait) - - retrying = CustomRetryFactory().build() - with MockServer(resource=DropResource) as mockserver: - client = client_cls( - api_key="a", api_url=mockserver.urljoin("/"), retrying=retrying - ) - with pytest.raises(OutlierException): - await getattr(client, get_method)( - {"url": "https://example.com", "browserHtml": True}, - ) - - -@pytest.mark.parametrize( - ("client_cls", "get_method"), - ( - (AsyncZyteAPI, "get"), - (AsyncClient, "request_raw"), - ), -) -@pytest.mark.parametrize( - ("subdomain", "stopper"), - ( - ("e429", "throttling"), - ("e520", "temporary_download_error"), - ), -) -@pytest.mark.asyncio -async def test_retry_stop(client_cls, get_method, subdomain, stopper, mockserver): - def broken_stop(self, retry_state): - raise OutlierException - - class CustomRetryFactory(RetryFactory): - def wait(self, retry_state): - return None - - setattr(CustomRetryFactory, f"{stopper}_stop", broken_stop) - - retrying = CustomRetryFactory().build() - client = client_cls(api_key="a", api_url=mockserver.urljoin("/"), retrying=retrying) - with pytest.raises(OutlierException): - await getattr(client, get_method)( - {"url": f"https://{subdomain}.example", "browserHtml": True}, - ) - - -@pytest.mark.parametrize( - ("client_cls", "get_method"), - ( - (AsyncZyteAPI, "get"), - (AsyncClient, "request_raw"), - ), -) -@pytest.mark.asyncio -async def test_retry_stop_network_error(client_cls, get_method): - stopper = "network_error" - - def broken_stop(self, retry_state): - raise OutlierException - - class CustomRetryFactory(RetryFactory): - def wait(self, retry_state): - return None - - setattr(CustomRetryFactory, f"{stopper}_stop", broken_stop) - - retrying = CustomRetryFactory().build() - with MockServer(resource=DropResource) as mockserver: - client = client_cls( - api_key="a", api_url=mockserver.urljoin("/"), retrying=retrying - ) - with pytest.raises(OutlierException): - await getattr(client, get_method)( - {"url": "https://example.com", "browserHtml": True}, - ) - - @pytest.mark.parametrize( ("client_cls", "get_method", "iter_method"), ( @@ -482,3 +312,10 @@ async def test_session_no_context_manager(mockserver): assert Exception in expected_results else: assert actual_result in expected_results + + +def test_retrying_class(): + """A descriptive exception is raised when creating a client with an + AsyncRetrying subclass or similar instead of an instance of it.""" + with pytest.raises(ValueError): + AsyncZyteAPI(api_key="foo", retrying=AggressiveRetryFactory) diff --git a/tests/test_retry.py b/tests/test_retry.py index 21fc41d..ef8f2d0 100644 --- a/tests/test_retry.py +++ b/tests/test_retry.py @@ -1,3 +1,23 @@ +from collections import deque +from copy import copy +from unittest.mock import patch + +import pytest +from aiohttp.client_exceptions import ServerConnectionError +from tenacity import AsyncRetrying + +from zyte_api import ( + AggressiveRetryFactory, + AsyncZyteAPI, + RequestError, + RetryFactory, + aggressive_retrying, + zyte_api_retrying, +) + +from .mockserver import DropResource, MockServer + + def test_deprecated_imports(): from zyte_api import RetryFactory, zyte_api_retrying from zyte_api.aio.retry import RetryFactory as DeprecatedRetryFactory @@ -5,3 +25,446 @@ def test_deprecated_imports(): assert RetryFactory is DeprecatedRetryFactory assert zyte_api_retrying is deprecated_zyte_api_retrying + + +UNSET = object() + + +class OutlierException(RuntimeError): + pass + + +@pytest.mark.parametrize( + ("value", "exception"), + ( + (UNSET, OutlierException), + (True, OutlierException), + (False, RequestError), + ), +) +@pytest.mark.asyncio +async def test_get_handle_retries(value, exception, mockserver): + kwargs = {} + if value is not UNSET: + kwargs["handle_retries"] = value + + def broken_stop(_): + raise OutlierException + + retrying = AsyncRetrying(stop=broken_stop) + client = AsyncZyteAPI( + api_key="a", api_url=mockserver.urljoin("/"), retrying=retrying + ) + with pytest.raises(exception): + await client.get( + {"url": "https://exception.example", "browserHtml": True}, + **kwargs, + ) + + +@pytest.mark.parametrize( + ("retry_factory", "status", "waiter"), + ( + (RetryFactory, 429, "throttling"), + (RetryFactory, 520, "temporary_download_error"), + (AggressiveRetryFactory, 429, "throttling"), + (AggressiveRetryFactory, 500, "undocumented_error"), + (AggressiveRetryFactory, 520, "download_error"), + ), +) +@pytest.mark.asyncio +async def test_retry_wait(retry_factory, status, waiter, mockserver): + def broken_wait(self, retry_state): + raise OutlierException + + class CustomRetryFactory(retry_factory): + pass + + setattr(CustomRetryFactory, f"{waiter}_wait", broken_wait) + retrying = CustomRetryFactory().build() + client = AsyncZyteAPI( + api_key="a", api_url=mockserver.urljoin("/"), retrying=retrying + ) + with pytest.raises(OutlierException): + await client.get( + {"url": f"https://e{status}.example", "browserHtml": True}, + ) + + +@pytest.mark.parametrize( + ("retry_factory",), + ( + (RetryFactory,), + (AggressiveRetryFactory,), + ), +) +@pytest.mark.asyncio +async def test_retry_wait_network_error(retry_factory): + waiter = "network_error" + + def broken_wait(self, retry_state): + raise OutlierException + + class CustomRetryFactory(retry_factory): + pass + + setattr(CustomRetryFactory, f"{waiter}_wait", broken_wait) + + retrying = CustomRetryFactory().build() + with MockServer(resource=DropResource) as mockserver: + client = AsyncZyteAPI( + api_key="a", api_url=mockserver.urljoin("/"), retrying=retrying + ) + with pytest.raises(OutlierException): + await client.get( + {"url": "https://example.com", "browserHtml": True}, + ) + + +def mock_request_error(*, status=200): + return RequestError( + history=None, + request_info=None, + response_content=None, + status=status, + query={}, + ) + + +# Number of times to test request errors that must be retried forever. +FOREVER_TIMES = 100 + + +class fast_forward: + def __init__(self, time): + self.time = time + + +@pytest.mark.parametrize( + ("retrying", "outcomes", "exhausted"), + ( + # Shared behaviors of all retry policies + *( + (retrying, outcomes, exhausted) + for retrying in (zyte_api_retrying, aggressive_retrying) + for outcomes, exhausted in ( + # Rate limiting is retried forever. + ( + (mock_request_error(status=429),) * FOREVER_TIMES, + False, + ), + ( + (mock_request_error(status=503),) * FOREVER_TIMES, + False, + ), + # Network errors are retried until there have only been network + # errors (of any kind) for 15 minutes straight or more. + ( + ( + ServerConnectionError(), + fast_forward(15 * 60 - 1), + ServerConnectionError(), + ), + False, + ), + ( + ( + ServerConnectionError(), + fast_forward(15 * 60), + ServerConnectionError(), + ), + True, + ), + ( + ( + mock_request_error(status=429), + fast_forward(15 * 60 - 1), + ServerConnectionError(), + ), + False, + ), + ( + ( + mock_request_error(status=429), + fast_forward(15 * 60), + ServerConnectionError(), + ), + False, + ), + ( + ( + ServerConnectionError(), + fast_forward(7 * 60), + mock_request_error(status=429), + fast_forward(8 * 60 - 1), + ServerConnectionError(), + ), + False, + ), + ( + ( + ServerConnectionError(), + fast_forward(7 * 60), + mock_request_error(status=429), + fast_forward(8 * 60), + ServerConnectionError(), + ), + False, + ), + ( + ( + ServerConnectionError(), + fast_forward(7 * 60), + mock_request_error(status=429), + fast_forward(8 * 60), + ServerConnectionError(), + fast_forward(15 * 60 - 1), + ServerConnectionError(), + ), + False, + ), + ( + ( + ServerConnectionError(), + fast_forward(7 * 60), + mock_request_error(status=429), + fast_forward(8 * 60), + ServerConnectionError(), + fast_forward(15 * 60), + ServerConnectionError(), + ), + True, + ), + ) + ), + # Behaviors specific to the default retry policy + *( + (zyte_api_retrying, outcomes, exhausted) + for outcomes, exhausted in ( + # Temporary download errors are retried until they have + # happened 4 times in total. + ( + (mock_request_error(status=520),) * 3, + False, + ), + ( + (mock_request_error(status=520),) * 4, + True, + ), + ( + ( + *(mock_request_error(status=429),) * 2, + mock_request_error(status=520), + ), + False, + ), + ( + ( + *(mock_request_error(status=429),) * 3, + mock_request_error(status=520), + ), + False, + ), + ( + ( + *( + mock_request_error(status=429), + mock_request_error(status=520), + ) + * 3, + ), + False, + ), + ( + ( + *( + mock_request_error(status=429), + mock_request_error(status=520), + ) + * 4, + ), + True, + ), + ) + ), + # Behaviors specific to the aggressive retry policy + *( + (aggressive_retrying, outcomes, exhausted) + for outcomes, exhausted in ( + # Temporary download errors are retried until they have + # happened 8 times in total. Permanent download errors also + # count towards that limit. + ( + (mock_request_error(status=520),) * 7, + False, + ), + ( + (mock_request_error(status=520),) * 8, + True, + ), + ( + ( + *(mock_request_error(status=429),) * 6, + mock_request_error(status=520), + ), + False, + ), + ( + ( + *(mock_request_error(status=429),) * 7, + mock_request_error(status=520), + ), + False, + ), + ( + ( + *( + mock_request_error(status=429), + mock_request_error(status=520), + ) + * 7, + ), + False, + ), + ( + ( + *( + mock_request_error(status=429), + mock_request_error(status=520), + ) + * 8, + ), + True, + ), + ( + ( + *(mock_request_error(status=520),) * 5, + *(mock_request_error(status=521),) * 1, + *(mock_request_error(status=520),) * 1, + ), + False, + ), + ( + ( + *(mock_request_error(status=520),) * 6, + *(mock_request_error(status=521),) * 1, + *(mock_request_error(status=520),) * 1, + ), + True, + ), + ( + ( + *(mock_request_error(status=520),) * 6, + *(mock_request_error(status=521),) * 1, + ), + False, + ), + ( + ( + *(mock_request_error(status=520),) * 7, + *(mock_request_error(status=521),) * 1, + ), + True, + ), + # Permanent download errors are retried until they have + # happened 4 times in total. + ( + (*(mock_request_error(status=521),) * 3,), + False, + ), + ( + (*(mock_request_error(status=521),) * 4,), + True, + ), + # Undocumented 5xx errors are retried up to 3 times. + *( + scenario + for status in ( + 500, + 502, + 504, + ) + for scenario in ( + ( + (*(mock_request_error(status=status),) * 3,), + False, + ), + ( + (*(mock_request_error(status=status),) * 4,), + True, + ), + ( + ( + *(mock_request_error(status=status),) * 2, + mock_request_error(status=429), + mock_request_error(status=503), + ServerConnectionError(), + mock_request_error(status=status), + ), + False, + ), + ( + ( + *(mock_request_error(status=status),) * 3, + mock_request_error(status=429), + mock_request_error(status=503), + ServerConnectionError(), + mock_request_error(status=status), + ), + True, + ), + ( + ( + mock_request_error(status=status), + mock_request_error(status=555), + mock_request_error(status=status), + ), + False, + ), + ( + ( + mock_request_error(status=status), + mock_request_error(status=555), + *(mock_request_error(status=status),) * 2, + ), + True, + ), + ) + ), + ) + ), + ), +) +@pytest.mark.asyncio +@patch("time.monotonic") +async def test_retry_stop(monotonic_mock, retrying, outcomes, exhausted): + monotonic_mock.return_value = 0 + last_outcome = outcomes[-1] + outcomes = deque(outcomes) + + def wait(retry_state): + return 0.0 + + retrying = copy(retrying) + retrying.wait = wait + + async def run(): + while True: + try: + outcome = outcomes.popleft() + except IndexError: + return + else: + if isinstance(outcome, fast_forward): + monotonic_mock.return_value += outcome.time + continue + raise outcome + + run = retrying.wraps(run) + try: + await run() + except Exception as outcome: + assert exhausted + assert outcome is last_outcome + else: + assert not exhausted diff --git a/zyte_api/__init__.py b/zyte_api/__init__.py index 347c509..1f97fd2 100644 --- a/zyte_api/__init__.py +++ b/zyte_api/__init__.py @@ -4,11 +4,21 @@ from ._async import AsyncZyteAPI from ._errors import RequestError -from ._retry import RetryFactory +from ._retry import AggressiveRetryFactory, RetryFactory +from ._retry import aggressive_retrying as _aggressive_retrying +from ._retry import ( + stop_after_uninterrupted_delay, + stop_on_count, + stop_on_download_error, +) from ._retry import zyte_api_retrying as _zyte_api_retrying from ._sync import ZyteAPI from .errors import ParsedError -# We re-define the variable here for Sphinx to pick the documentation. +# We re-define the variables here for Sphinx to pick the documentation. + #: :ref:`Default retry policy `. zyte_api_retrying = _zyte_api_retrying + +#: :ref:`Aggresive retry policy `. +aggressive_retrying = _aggressive_retrying diff --git a/zyte_api/_async.py b/zyte_api/_async.py index 630133e..261277f 100644 --- a/zyte_api/_async.py +++ b/zyte_api/_async.py @@ -91,6 +91,11 @@ def __init__( retrying: Optional[AsyncRetrying] = None, user_agent: Optional[str] = None, ): + if retrying is not None and not isinstance(retrying, AsyncRetrying): + raise ValueError( + "The retrying parameter, if defined, must be an instance of " + "AsyncRetrying." + ) self.api_key = get_apikey(api_key) self.api_url = api_url self.n_conn = n_conn diff --git a/zyte_api/_retry.py b/zyte_api/_retry.py index bd169b6..04b16b1 100644 --- a/zyte_api/_retry.py +++ b/zyte_api/_retry.py @@ -1,5 +1,9 @@ import asyncio import logging +from collections import Counter +from datetime import timedelta +from itertools import count +from typing import Union from aiohttp import client_exceptions from tenacity import ( @@ -10,19 +14,19 @@ before_sleep_log, retry_base, retry_if_exception, - stop_after_attempt, - stop_after_delay, wait_chain, wait_fixed, wait_random, wait_random_exponential, ) -from tenacity.stop import stop_never +from tenacity.stop import stop_base, stop_never from ._errors import RequestError logger = logging.getLogger(__name__) +_IDS = count() + _NETWORK_ERRORS = ( asyncio.TimeoutError, # could happen while reading the response body @@ -54,59 +58,100 @@ def _is_temporary_download_error(exc: BaseException) -> bool: return isinstance(exc, RequestError) and exc.status == 520 -class RetryFactory: - """Factory class that builds the :class:`tenacity.AsyncRetrying` object - that defines the :ref:`default retry policy `. +class stop_on_count(stop_base): + """Keep a call count with the specified counter name, and stop after the + specified number os calls. - To create a custom retry policy, you can subclass this factory class, - modify it as needed, and then call :meth:`build` on your subclass to get - the corresponding :class:`tenacity.AsyncRetrying` object. + Unlike stop_after_attempt, this callable does not take into account + attempts for which a different stop callable was used. + """ - For example, to increase the maximum number of attempts for :ref:`temporary - download errors ` from 4 (i.e. 3 - retries) to 10 (i.e. 9 retries): + def __init__(self, max_count: int) -> None: + self._max_count = max_count + self._counter_id = next(_IDS) - .. code-block:: python + def __call__(self, retry_state: "RetryCallState") -> bool: + if not hasattr(retry_state, "counter"): + retry_state.counter = Counter() # type: ignore + retry_state.counter[self._counter_id] += 1 # type: ignore + if retry_state.counter[self._counter_id] >= self._max_count: # type: ignore + return True + return False - from tenacity import stop_after_attempt - from zyte_api import RetryFactory +time_unit_type = Union[int, float, timedelta] - class CustomRetryFactory(RetryFactory): - temporary_download_error_stop = stop_after_attempt(10) +def to_seconds(time_unit: time_unit_type) -> float: + return float( + time_unit.total_seconds() if isinstance(time_unit, timedelta) else time_unit + ) - CUSTOM_RETRY_POLICY = CustomRetryFactory().build() - To retry :ref:`permanent download errors - `, treating them the same as - :ref:`temporary download errors `: +class stop_after_uninterrupted_delay(stop_base): + """Stop when this stop callable has been called for the specified time + uninterrupted, i.e. without calls to different stop callables. - .. code-block:: python + Unlike stop_after_delay, this callable resets its timer after any attempt + for which a different stop callable was used. + """ - from tenacity import RetryCallState, retry_if_exception, stop_after_attempt - from zyte_api import RequestError, RetryFactory + def __init__(self, max_delay: time_unit_type) -> None: + self._max_delay = to_seconds(max_delay) + self._timer_id = next(_IDS) + + def __call__(self, retry_state: "RetryCallState") -> bool: + if not hasattr(retry_state, "uninterrupted_start_times"): + retry_state.uninterrupted_start_times = {} # type: ignore + if self._timer_id not in retry_state.uninterrupted_start_times: # type: ignore + # First time. + retry_state.uninterrupted_start_times[self._timer_id] = [ # type: ignore + retry_state.attempt_number, + retry_state.outcome_timestamp, + ] + return False + attempt_number, start_time = retry_state.uninterrupted_start_times[ # type: ignore + self._timer_id + ] + if retry_state.attempt_number - attempt_number > 1: + # There was a different stop reason since the last attempt, + # resetting the timer. + retry_state.uninterrupted_start_times[self._timer_id] = [ # type: ignore + retry_state.attempt_number, + retry_state.outcome_timestamp, + ] + return False + if retry_state.outcome_timestamp - start_time < self._max_delay: + # Within time, do not stop, only increase the attempt count. + retry_state.uninterrupted_start_times[self._timer_id][0] += 1 # type: ignore + return False + return True - def is_permanent_download_error(exc: BaseException) -> bool: - return isinstance(exc, RequestError) and exc.status == 521 +class RetryFactory: + """Factory class that builds the :class:`tenacity.AsyncRetrying` object + that defines the :ref:`default retry policy `. + To create a custom retry policy, you can subclass this factory class, + modify it as needed, and then call :meth:`build` on your subclass to get + the corresponding :class:`tenacity.AsyncRetrying` object. - class CustomRetryFactory(RetryFactory): + For example, to double the number of attempts for :ref:`temporary + download errors ` and the time network + errors are retried: - retry_condition = RetryFactory.retry_condition | retry_if_exception( - is_permanent_download_error - ) + .. code-block:: python + + from zyte_api import ( + RetryFactory, + stop_after_uninterrupted_delay, + stop_on_count, + ) - def wait(self, retry_state: RetryCallState) -> float: - if is_permanent_download_error(retry_state.outcome.exception()): - return self.temporary_download_error_wait(retry_state=retry_state) - return super().wait(retry_state) - def stop(self, retry_state: RetryCallState) -> bool: - if is_permanent_download_error(retry_state.outcome.exception()): - return self.temporary_download_error_stop(retry_state) - return super().stop(retry_state) + class CustomRetryFactory(RetryFactory): + network_error_stop = stop_after_uninterrupted_delay(30 * 60) + temporary_download_error_stop = stop_on_count(8) CUSTOM_RETRY_POLICY = CustomRetryFactory().build() @@ -136,8 +181,8 @@ def stop(self, retry_state: RetryCallState) -> bool: ) temporary_download_error_wait = network_error_wait throttling_stop = stop_never - network_error_stop = stop_after_delay(15 * 60) - temporary_download_error_stop = stop_after_attempt(4) + network_error_stop = stop_after_uninterrupted_delay(15 * 60) + temporary_download_error_stop = stop_on_count(4) def wait(self, retry_state: RetryCallState) -> float: assert retry_state.outcome, "Unexpected empty outcome" @@ -177,3 +222,105 @@ def build(self) -> AsyncRetrying: zyte_api_retrying: AsyncRetrying = RetryFactory().build() + + +def _download_error(exc: BaseException) -> bool: + return isinstance(exc, RequestError) and exc.status in {520, 521} + + +def _undocumented_error(exc: BaseException) -> bool: + return ( + isinstance(exc, RequestError) + and exc.status >= 500 + and exc.status not in {503, 520, 521} + ) + + +class stop_on_download_error(stop_base): + """Stop after the specified max numbers of total or permanent download + errors.""" + + def __init__(self, max_total: int, max_permanent: int) -> None: + self._max_total = max_total + self._max_permanent = max_permanent + + def __call__(self, retry_state: "RetryCallState") -> bool: + if not hasattr(retry_state, "counter"): + retry_state.counter = Counter() # type: ignore + assert retry_state.outcome, "Unexpected empty outcome" + exc = retry_state.outcome.exception() + assert exc, "Unexpected empty exception" + if exc.status == 521: # type: ignore + retry_state.counter["permanent_download_error"] += 1 # type: ignore + if retry_state.counter["permanent_download_error"] >= self._max_permanent: # type: ignore + return True + retry_state.counter["download_error"] += 1 # type: ignore + if retry_state.counter["download_error"] >= self._max_total: # type: ignore + return True + return False + + +class AggressiveRetryFactory(RetryFactory): + """Factory class that builds the :class:`tenacity.AsyncRetrying` object + that defines the :ref:`aggressive retry policy `. + + To create a custom retry policy, you can subclass this factory class, + modify it as needed, and then call :meth:`build` on your subclass to get + the corresponding :class:`tenacity.AsyncRetrying` object. + + For example, to double the maximum number of attempts for all error + responses and double the time network errors are retried: + + .. code-block:: python + + from zyte_api import ( + AggressiveRetryFactory, + stop_after_uninterrupted_delay, + stop_on_count, + stop_on_download_error, + ) + + + class CustomRetryFactory(AggressiveRetryFactory): + download_error_stop = stop_on_download_error(max_total=16, max_permanent=8) + network_error_stop = stop_after_uninterrupted_delay(30 * 60) + undocumented_error_stop = stop_on_count(8) + + + CUSTOM_RETRY_POLICY = CustomRetryFactory().build() + """ + + retry_condition = ( + RetryFactory.retry_condition + | retry_if_exception(_download_error) + | retry_if_exception(_undocumented_error) + ) + + download_error_stop = stop_on_download_error(max_total=8, max_permanent=4) + download_error_wait = RetryFactory.temporary_download_error_wait + + undocumented_error_stop = stop_on_count(4) + undocumented_error_wait = RetryFactory.temporary_download_error_wait + + def stop(self, retry_state: RetryCallState) -> bool: + assert retry_state.outcome, "Unexpected empty outcome" + exc = retry_state.outcome.exception() + assert exc, "Unexpected empty exception" + if _download_error(exc): + return self.download_error_stop(retry_state) + if _undocumented_error(exc): + return self.undocumented_error_stop(retry_state) + return super().stop(retry_state) + + def wait(self, retry_state: RetryCallState) -> float: + assert retry_state.outcome, "Unexpected empty outcome" + exc = retry_state.outcome.exception() + assert exc, "Unexpected empty exception" + if _download_error(exc): + return self.download_error_wait(retry_state) + if _undocumented_error(exc): + return self.undocumented_error_wait(retry_state=retry_state) + return super().wait(retry_state) + + +aggressive_retrying = AggressiveRetryFactory().build()