From d5430a46b7df918d5751e50e1b74601d74556d92 Mon Sep 17 00:00:00 2001 From: Michael Welborn Date: Wed, 29 Jan 2025 09:59:25 -0600 Subject: [PATCH 1/2] Add configurable retry behavior with reasonable defaults for count, wait, backoff, and jitter This makes retry behavior consistent between sync and async clients, and in its default configuration will get users past network interruptions. --- indico/config/config.py | 9 +++ indico/http/client.py | 19 ++++- indico/http/retry.py | 132 +++++++++++++++++++--------------- tests/unit/http/test_retry.py | 73 +++++++++++++++++++ 4 files changed, 173 insertions(+), 60 deletions(-) create mode 100644 tests/unit/http/test_retry.py diff --git a/indico/config/config.py b/indico/config/config.py index 1f253133..eed029ab 100644 --- a/indico/config/config.py +++ b/indico/config/config.py @@ -16,6 +16,10 @@ class IndicoConfig: api_token= (str, optional): The actual text of the API Token. Takes precedence over api_token_path verify_ssl= (bool, optional): Whether to verify the host's SSL certificate. Default=True requests_params= (dict, optional): Dictionary of requests. Session parameters to set + retry_count= (int, optional): Retry API calls this many times. + retry_wait= (float, optional): Wait this many seconds after the first error before retrying. + retry_backoff= (float, optional): Multiply the wait time by this amount for each additional error. + retry_jitter= (float, optional): Add a random amount of time (up to this percent as a decimal) to the wait time to prevent simultaneous retries. Returns: IndicoConfig object @@ -33,6 +37,11 @@ class IndicoConfig: requests_params: dict = None _disable_cookie_domain: bool = False + retry_count: int = 4 + retry_wait: float = 1 + retry_backoff: float = 4 + retry_jitter: float = 0.5 + def __init__(self, **kwargs): self.host: str = os.getenv("INDICO_HOST") diff --git a/indico/http/client.py b/indico/http/client.py index 0e67dfbc..5328a97e 100644 --- a/indico/http/client.py +++ b/indico/http/client.py @@ -18,7 +18,7 @@ ) from indico.http.serialization import aio_deserialize, deserialize -from .retry import aioretry +from .retry import retry logger = logging.getLogger(__file__) @@ -41,6 +41,14 @@ class HTTPClient: def __init__(self, config: IndicoConfig = None): self.config = config self.base_url = f"{self.config.protocol}://{self.config.host}" + self._decorate_with_retry = retry( + requests.RequestException, + count=self.config.retry_count, + wait=self.config.retry_wait, + backoff=self.config.retry_backoff, + jitter=self.config.retry_jitter, + ) + self._make_request = self._decorate_with_retry(self._make_request) self.request_session = requests.Session() if config and isinstance(config.requests_params, dict): @@ -210,6 +218,14 @@ def __init__(self, config: Optional[IndicoConfig] = None): """ self.config = config or IndicoConfig() self.base_url = f"{self.config.protocol}://{self.config.host}" + self._decorate_with_retry = retry( + aiohttp.ClientConnectionError, + count=self.config.retry_count, + wait=self.config.retry_wait, + backoff=self.config.retry_backoff, + jitter=self.config.retry_jitter, + ) + self._make_request = self._decorate_with_retry(self._make_request) self.request_session = aiohttp.ClientSession() if config and isinstance(config.requests_params, dict): @@ -282,7 +298,6 @@ def _handle_files(self, req_kwargs): if files: [f.close() for f in files] - @aioretry((aiohttp.ClientConnectionError, aiohttp.ServerDisconnectedError)) async def _make_request( self, method: str, diff --git a/indico/http/retry.py b/indico/http/retry.py index b9c9cd0d..b3843076 100644 --- a/indico/http/retry.py +++ b/indico/http/retry.py @@ -1,77 +1,93 @@ import asyncio -from random import randint import time -import typing as t from functools import wraps +from inspect import iscoroutinefunction +from random import random +from typing import TYPE_CHECKING, overload +if TYPE_CHECKING: + from collections.abc import Awaitable, Callable + from typing import ParamSpec, TypeVar + + ArgumentsType = ParamSpec("ArgumentsType") + OuterReturnType = TypeVar("OuterReturnType") + InnerReturnType = TypeVar("InnerReturnType") def retry( - ExceptionTypes: t.Type[Exception], tries: int = 3, delay: int = 1, backoff: int = 2 -) -> t.Callable: + *errors: "type[Exception]", + count: int = 4, + wait: float = 1, + backoff: float = 4, + jitter: float = 0.5, +) -> "Callable[[Callable[ArgumentsType, OuterReturnType]], Callable[ArgumentsType, OuterReturnType]]": # noqa: E501 """ - Retry with exponential backoff + Decorate a function or coroutine to retry when it raises specified errors, + apply exponential backoff and jitter to the wait time, + and raise the last error if it retries too many times. + + By default, the decorated function or coroutine will be retried up to 4 times over + the course of ~2 minutes (waiting 1, 4, 16, and 64 seconds; plus up to 50% jitter). - Original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry + Arguments: + errors: Retry the function when it raises one of these errors. + count: Retry the function this many times. + wait: Wait this many seconds after the first error before retrying. + backoff: Multiply the wait time by this amount for each additional error. + jitter: Add a random amount of time (up to this percent as a decimal) + to the wait time to prevent simultaneous retries. """ - def retry_decorator(f: t.Callable) -> t.Any: - @wraps(f) - def retry_fn(*args: t.Any, **kwargs: t.Any) -> t.Any: - n_tries, n_delay = tries, delay - while n_tries > 1: - try: - return f(*args, **kwargs) - except ExceptionTypes: - time.sleep(n_delay) - n_tries -= 1 - n_delay *= backoff - return f(*args, **kwargs) + def wait_time(times_retried: int) -> float: + """ + Calculate the sleep time based on number of times retried. + """ + return wait * backoff**times_retried * (1 + jitter * random()) - return retry_fn + @overload + def retry_decorator( + decorated: "Callable[ArgumentsType, Awaitable[InnerReturnType]]", + ) -> "Callable[ArgumentsType, Awaitable[InnerReturnType]]": ... + @overload + def retry_decorator( + decorated: "Callable[ArgumentsType, InnerReturnType]", + ) -> "Callable[ArgumentsType, InnerReturnType]": ... + def retry_decorator( + decorated: "Callable[ArgumentsType, InnerReturnType]", + ) -> "Callable[ArgumentsType, Awaitable[InnerReturnType]] | Callable[ArgumentsType, InnerReturnType]": # noqa: E501 + """ + Decorate either a function or coroutine as appropriate. + """ + if iscoroutinefunction(decorated): + @wraps(decorated) + async def retrying_coroutine( # type: ignore[return] + *args: "ArgumentsType.args", **kwargs: "ArgumentsType.kwargs" + ) -> "InnerReturnType": + for times_retried in range(count + 1): + try: + return await decorated(*args, **kwargs) # type: ignore[no-any-return] + except errors: + if times_retried >= count: + raise - return retry_decorator + await asyncio.sleep(wait_time(times_retried)) -def aioretry( - ExceptionTypes: t.Type[Exception], - tries: int = 3, - delay: t.Union[int, t.Tuple[int, int]] = 1, - backoff: int = 2, - condition: t.Optional[t.Callable[[Exception], bool]] = None, -) -> t.Callable: - """ - Retry with exponential backoff + return retrying_coroutine - Original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry - Options: - condition: Callable to evaluate if an exception of a given type - is retryable for additional handling - delay: an initial time to wait (seconds). If a tuple, choose a random number - in that range to start. This can helps prevent retries at the exact - same time across multiple concurrent function calls - """ + else: + @wraps(decorated) + def retrying_function( # type: ignore[return] + *args: "ArgumentsType.args", **kwargs: "ArgumentsType.kwargs" + ) -> "InnerReturnType": + for times_retried in range(count + 1): + try: + return decorated(*args, **kwargs) + except errors: + if times_retried >= count: + raise - def retry_decorator(f: t.Callable) -> t.Callable: - @wraps(f) - async def retry_fn(*args: t.Any, **kwargs: t.Any) -> t.Any: - n_tries = tries - if isinstance(delay, tuple): - # pick a random number to sleep - n_delay = randint(*delay) - else: - n_delay = delay - while True: - try: - return await f(*args, **kwargs) - except ExceptionTypes as e: - if condition and not condition(e): - raise - await asyncio.sleep(n_delay) - n_tries -= 1 - n_delay *= backoff - if n_tries <= 0: - raise + time.sleep(wait_time(times_retried)) - return retry_fn + return retrying_function return retry_decorator diff --git a/tests/unit/http/test_retry.py b/tests/unit/http/test_retry.py new file mode 100644 index 00000000..c408b6a0 --- /dev/null +++ b/tests/unit/http/test_retry.py @@ -0,0 +1,73 @@ +import pytest + +from indico.http.retry import retry + + +def test_no_errors() -> None: + @retry(Exception) + def no_errors() -> bool: + return True + + assert no_errors() + + +def test_raises_errors() -> None: + calls = 0 + + @retry(RuntimeError, count=4, wait=0) + def raises_errors() -> None: + nonlocal calls + calls += 1 + raise RuntimeError() + + with pytest.raises(RuntimeError): + raises_errors() + + assert calls == 5 + + +def test_raises_other_errors() -> None: + calls = 0 + + @retry(RuntimeError, count=4, wait=0) + def raises_errors() -> None: + nonlocal calls + calls += 1 + raise ValueError() + + with pytest.raises(ValueError): + raises_errors() + + assert calls == 1 + + +@pytest.mark.asyncio +async def test_raises_errors_async() -> None: + calls = 0 + + @retry(RuntimeError, count=4, wait=0) + async def raises_errors() -> None: + nonlocal calls + calls += 1 + raise RuntimeError() + + with pytest.raises(RuntimeError): + await raises_errors() + + assert calls == 5 + + +@pytest.mark.asyncio +async def test_raises_other_errors_async() -> None: + calls = 0 + + @retry(RuntimeError, count=4, wait=0) + async def raises_errors() -> None: + nonlocal calls + calls += 1 + raise ValueError() + + with pytest.raises(ValueError): + await raises_errors() + + assert calls == 1 From d93eb77d45b16219384566c687041b98af7da403 Mon Sep 17 00:00:00 2001 From: Michael Welborn Date: Wed, 29 Jan 2025 10:16:54 -0600 Subject: [PATCH 2/2] Make socket connect and read timeouts consistent between sync and async clients --- indico/http/client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/indico/http/client.py b/indico/http/client.py index 5328a97e..04c9d2d6 100644 --- a/indico/http/client.py +++ b/indico/http/client.py @@ -158,6 +158,7 @@ def _make_request( f"{self.base_url}{path}", headers=headers, stream=True, + timeout=(4, 64), verify=False if not self.config.verify_ssl or not self.request_session.verify else True, @@ -326,6 +327,7 @@ async def _make_request( async with getattr(self.request_session, method)( f"{self.base_url}{path}", headers=headers, + timeout=aiohttp.ClientTimeout(sock_connect=4, sock_read=64), verify_ssl=self.config.verify_ssl, **request_kwargs, ) as response: