-
Notifications
You must be signed in to change notification settings - Fork 336
Refactored retry config into _retry.py
and added support for exponential backoff and Retry-After
header
#871
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: fcm-http2
Are you sure you want to change the base?
Changes from all commits
4c3a6d9
1c4c844
b3aba37
0b976cb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,224 @@ | ||
# Copyright 2025 Google Inc. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Internal retry logic module | ||
|
||
This module provides utilities for adding retry logic to HTTPX requests | ||
""" | ||
|
||
from __future__ import annotations | ||
import copy | ||
import email.utils | ||
import random | ||
import re | ||
import time | ||
from types import CoroutineType | ||
from typing import Any, Callable, List, Optional, Tuple | ||
import logging | ||
import asyncio | ||
import httpx | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class HttpxRetry: | ||
"""HTTPX based retry config""" | ||
# Status codes to be used for respecting `Retry-After` header | ||
RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503]) | ||
|
||
# Default maximum backoff time. | ||
DEFAULT_BACKOFF_MAX = 120 | ||
|
||
def __init__( | ||
self, | ||
max_retries: int = 10, | ||
status_forcelist: Optional[List[int]] = None, | ||
backoff_factor: float = 0, | ||
backoff_max: float = DEFAULT_BACKOFF_MAX, | ||
backoff_jitter: float = 0, | ||
history: Optional[List[Tuple[ | ||
httpx.Request, | ||
Optional[httpx.Response], | ||
Optional[Exception] | ||
]]] = None, | ||
respect_retry_after_header: bool = False, | ||
) -> None: | ||
self.retries_left = max_retries | ||
self.status_forcelist = status_forcelist | ||
self.backoff_factor = backoff_factor | ||
self.backoff_max = backoff_max | ||
self.backoff_jitter = backoff_jitter | ||
if history: | ||
self.history = history | ||
else: | ||
self.history = [] | ||
self.respect_retry_after_header = respect_retry_after_header | ||
|
||
def copy(self) -> HttpxRetry: | ||
"""Creates a deep copy of this instance.""" | ||
return copy.deepcopy(self) | ||
|
||
def is_retryable_response(self, response: httpx.Response) -> bool: | ||
"""Determine if a response implies that the request should be retried if possible.""" | ||
if self.status_forcelist and response.status_code in self.status_forcelist: | ||
return True | ||
|
||
has_retry_after = bool(response.headers.get("Retry-After")) | ||
if ( | ||
self.respect_retry_after_header | ||
and has_retry_after | ||
and response.status_code in self.RETRY_AFTER_STATUS_CODES | ||
): | ||
return True | ||
|
||
return False | ||
|
||
def is_exhausted(self) -> bool: | ||
"""Determine if there are anymore more retires.""" | ||
# retries_left is negative | ||
return self.retries_left < 0 | ||
|
||
# Identical implementation of `urllib3.Retry.parse_retry_after()` | ||
def _parse_retry_after(self, retry_after_header: str) -> float | None: | ||
"""Parses Retry-After string into a float with unit seconds.""" | ||
seconds: float | ||
# Whitespace: https://tools.ietf.org/html/rfc7230#section-3.2.4 | ||
if re.match(r"^\s*[0-9]+\s*$", retry_after_header): | ||
seconds = int(retry_after_header) | ||
else: | ||
retry_date_tuple = email.utils.parsedate_tz(retry_after_header) | ||
if retry_date_tuple is None: | ||
raise httpx.RemoteProtocolError(f"Invalid Retry-After header: {retry_after_header}") | ||
|
||
retry_date = email.utils.mktime_tz(retry_date_tuple) | ||
seconds = retry_date - time.time() | ||
|
||
seconds = max(seconds, 0) | ||
|
||
return seconds | ||
|
||
def get_retry_after(self, response: httpx.Response) -> float | None: | ||
"""Determine the Retry-After time needed before sending the next request.""" | ||
retry_after_header = response.headers.get('Retry-After', None) | ||
if retry_after_header: | ||
# Convert retry header to a float in seconds | ||
return self._parse_retry_after(retry_after_header) | ||
return None | ||
|
||
def get_backoff_time(self): | ||
"""Determine the backoff time needed before sending the next request.""" | ||
# attempt_count is the number of previous request attempts | ||
attempt_count = len(self.history) | ||
# Backoff should be set to 0 until after first retry. | ||
if attempt_count <= 1: | ||
return 0 | ||
backoff = self.backoff_factor * (2 ** (attempt_count-1)) | ||
if self.backoff_jitter: | ||
backoff += random.random() * self.backoff_jitter | ||
return float(max(0, min(self.backoff_max, backoff))) | ||
|
||
async def sleep_for_backoff(self) -> None: | ||
"""Determine and wait the backoff time needed before sending the next request.""" | ||
backoff = self.get_backoff_time() | ||
logger.debug('Sleeping for backoff of %f seconds following failed request', backoff) | ||
await asyncio.sleep(backoff) | ||
|
||
async def sleep(self, response: httpx.Response) -> None: | ||
"""Determine and wait the time needed before sending the next request.""" | ||
if self.respect_retry_after_header: | ||
retry_after = self.get_retry_after(response) | ||
if retry_after: | ||
logger.debug( | ||
'Sleeping for Retry-After header of %f seconds following failed request', | ||
retry_after | ||
) | ||
await asyncio.sleep(retry_after) | ||
return | ||
await self.sleep_for_backoff() | ||
|
||
def increment( | ||
self, | ||
request: httpx.Request, | ||
response: Optional[httpx.Response] = None, | ||
error: Optional[Exception] = None | ||
) -> None: | ||
"""Update the retry state based on request attempt.""" | ||
self.retries_left -= 1 | ||
self.history.append((request, response, error)) | ||
|
||
|
||
class HttpxRetryTransport(httpx.AsyncBaseTransport): | ||
"""HTTPX transport with retry logic.""" | ||
|
||
DEFAULT_RETRY = HttpxRetry(max_retries=4, status_forcelist=[500, 503], backoff_factor=0.5) | ||
|
||
def __init__(self, retry: HttpxRetry = DEFAULT_RETRY, **kwargs) -> None: | ||
self._retry = retry | ||
|
||
transport_kwargs = kwargs.copy() | ||
transport_kwargs.update({'retries': 0, 'http2': True}) | ||
# We use a full AsyncHTTPTransport under the hood that is already | ||
# set up to handle requests. We also insure that that transport's internal | ||
# retries are not allowed. | ||
self._wrapped_transport = httpx.AsyncHTTPTransport(**transport_kwargs) | ||
|
||
async def handle_async_request(self, request: httpx.Request) -> httpx.Response: | ||
return await self._dispatch_with_retry( | ||
request, self._wrapped_transport.handle_async_request) | ||
|
||
async def _dispatch_with_retry( | ||
self, | ||
request: httpx.Request, | ||
dispatch_method: Callable[[httpx.Request], CoroutineType[Any, Any, httpx.Response]] | ||
) -> httpx.Response: | ||
"""Sends a request with retry logic using a provided dispatch method.""" | ||
# This request config is used across all requests that use this transport and therefore | ||
# needs to be copied to be used for just this request and it's retries. | ||
retry = self._retry.copy() | ||
# First request | ||
response, error = None, None | ||
|
||
while not retry.is_exhausted(): | ||
|
||
# First retry | ||
if response: | ||
await retry.sleep(response) | ||
|
||
# Need to reset here so only last attempt's error or response is saved. | ||
response, error = None, None | ||
|
||
try: | ||
logger.debug('Sending request in _dispatch_with_retry(): %r', request) | ||
response = await dispatch_method(request) | ||
logger.debug('Received response: %r', response) | ||
except httpx.HTTPError as err: | ||
logger.debug('Received error: %r', err) | ||
error = err | ||
|
||
if response and not retry.is_retryable_response(response): | ||
return response | ||
|
||
if error: | ||
raise error | ||
|
||
retry.increment(request, response, error) | ||
|
||
if response: | ||
return response | ||
if error: | ||
raise error | ||
raise AssertionError('_dispatch_with_retry() ended with no response or exception') | ||
|
||
async def aclose(self) -> None: | ||
await self._wrapped_transport.aclose() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
import json | ||
import warnings | ||
import asyncio | ||
import logging | ||
import requests | ||
import httpx | ||
|
||
|
@@ -38,7 +39,9 @@ | |
exceptions, | ||
App | ||
) | ||
from firebase_admin._retry import HttpxRetryTransport | ||
|
||
logger = logging.getLogger(__name__) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a way to set the log level in production code? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Developers should be able to set the logging level in their production apps by using the following code: import logging
logging.basicConfig()
firebase_admin_logger = logging.getLogger('firebase_admin')
firebase_admin_logger.setLevel(logging.DEBUG) |
||
|
||
_MESSAGING_ATTRIBUTE = '_messaging' | ||
|
||
|
@@ -376,15 +379,6 @@ def exception(self): | |
"""A ``FirebaseError`` if an error occurs while sending the message to the FCM service.""" | ||
return self._exception | ||
|
||
# Auth Flow | ||
# TODO: Remove comments | ||
# The aim here is to be able to get auth credentials right before the request is sent. | ||
# This is similar to what is done in transport.requests.AuthorizedSession(). | ||
# We can then pass this in at the client level. | ||
|
||
# Notes: | ||
# - This implementations does not cover timeouts on requests sent to refresh credentials. | ||
# - Uses HTTP/1 and a blocking credential for refreshing. | ||
class GoogleAuthCredentialFlow(httpx.Auth): | ||
"""Google Auth Credential Auth Flow""" | ||
def __init__(self, credential: credentials.Credentials): | ||
|
@@ -410,6 +404,9 @@ def auth_flow(self, request: httpx.Request): | |
# copy original headers | ||
request.headers = _original_headers.copy() | ||
# mutates request headers | ||
logger.debug( | ||
'Refreshing credentials for request attempt %d', | ||
_credential_refresh_attempt + 1) | ||
self.apply_auth_headers(request) | ||
|
||
# Continue to perform the request | ||
|
@@ -420,6 +417,9 @@ def auth_flow(self, request: httpx.Request): | |
# on refreshable status codes. Current transport.requests.AuthorizedSession() | ||
# only does this on 401 errors. We should do the same. | ||
if response.status_code in self._refresh_status_codes: | ||
logger.debug( | ||
'Request attempt %d failed due to unauthorized credentials', | ||
_credential_refresh_attempt + 1) | ||
_credential_refresh_attempt += 1 | ||
else: | ||
break | ||
|
@@ -670,11 +670,6 @@ def _handle_batch_error(self, error): | |
return _gapic_utils.handle_platform_error_from_googleapiclient( | ||
error, _MessagingService._build_fcm_error_googleapiclient) | ||
|
||
# TODO: Remove comments | ||
# We should be careful to clean up the httpx clients. | ||
# Since we are using an async client we must also close in async. However we can sync wrap this. | ||
# The close method is called by the app on shutdown/clean-up of each service. We don't seem to | ||
# make use of this much elsewhere. | ||
def close(self) -> None: | ||
asyncio.run(self._async_client.aclose()) | ||
|
||
|
@@ -715,45 +710,3 @@ def _build_fcm_error(cls, error_dict) -> Optional[Callable[..., exceptions.Fireb | |
fcm_code = detail.get('errorCode') | ||
break | ||
return _MessagingService.FCM_ERROR_TYPES.get(fcm_code) if fcm_code else None | ||
|
||
|
||
# TODO: Remove comments | ||
# Notes: | ||
# This implementation currently only covers basic retires for pre-defined status errors | ||
class HttpxRetryTransport(httpx.AsyncBaseTransport): | ||
"""HTTPX transport with retry logic.""" | ||
# We could also support passing kwargs here | ||
def __init__(self, **kwargs) -> None: | ||
# Hardcoded settings for now | ||
self._retryable_status_codes = (500, 503,) | ||
self._max_retry_count = 4 | ||
|
||
# - We use a full AsyncHTTPTransport under the hood to make use of it's | ||
# fully implemented `handle_async_request()`. | ||
# - We could consider making the `HttpxRetryTransport`` class extend a | ||
# `AsyncHTTPTransport` instead and use the parent class's methods to handle | ||
# requests. | ||
# - We should also ensure that that transport's internal retry is | ||
# not enabled. | ||
transport_kwargs = kwargs.copy() | ||
transport_kwargs.update({'retries': 0, 'http2': True}) | ||
self._wrapped_transport = httpx.AsyncHTTPTransport(**transport_kwargs) | ||
|
||
|
||
async def handle_async_request(self, request: httpx.Request) -> httpx.Response: | ||
_retry_count = 0 | ||
|
||
while True: | ||
# Dispatch request | ||
# Let exceptions pass through for now | ||
response = await self._wrapped_transport.handle_async_request(request) | ||
|
||
# Check if request is retryable | ||
if response.status_code in self._retryable_status_codes: | ||
_retry_count += 1 | ||
|
||
# Return if retries exhausted | ||
if _retry_count > self._max_retry_count: | ||
return response | ||
else: | ||
return response |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we plan to keep these logs in the production code?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would want to keep these for future debugging. This would be helpful to catch issues as we iterate. Wdyt?