Skip to content

Commit 2189e3e

Browse files
committed
fix: standardize anthropic error handling
1 parent 03d6208 commit 2189e3e

2 files changed

Lines changed: 153 additions & 20 deletions

File tree

src/google/adk/models/anthropic_llm.py

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from typing import TYPE_CHECKING
3333
from typing import Union
3434

35+
import anthropic
3536
from anthropic import AsyncAnthropic
3637
from anthropic import AsyncAnthropicVertex
3738
from anthropic import NOT_GIVEN
@@ -52,6 +53,15 @@
5253

5354
logger = logging.getLogger("google_adk." + __name__)
5455

56+
_RATE_LIMIT_POSSIBLE_FIX_MESSAGE = """
57+
To mitigate rate limit errors, consider using a different model, reducing
58+
request frequency, or upgrading your Anthropic API plan.
59+
"""
60+
61+
62+
class AnthropicRateLimitError(Exception):
63+
"""Raised when the Anthropic API returns a rate limit error."""
64+
5565

5666
@dataclasses.dataclass
5767
class _ToolUseAccumulator:
@@ -494,16 +504,21 @@ async def generate_content_async(
494504
thinking = _build_anthropic_thinking_param(llm_request.config)
495505

496506
if not stream:
497-
message = await self._anthropic_client.messages.create(
498-
model=model_to_use,
499-
system=llm_request.config.system_instruction,
500-
messages=messages,
501-
tools=tools,
502-
tool_choice=tool_choice,
503-
max_tokens=self.max_tokens,
504-
thinking=thinking,
505-
)
506-
yield message_to_generate_content_response(message)
507+
try:
508+
message = await self._anthropic_client.messages.create(
509+
model=model_to_use,
510+
system=llm_request.config.system_instruction,
511+
messages=messages,
512+
tools=tools,
513+
tool_choice=tool_choice,
514+
max_tokens=self.max_tokens,
515+
thinking=thinking,
516+
)
517+
yield message_to_generate_content_response(message)
518+
except anthropic.RateLimitError as e:
519+
raise AnthropicRateLimitError(
520+
f"{_RATE_LIMIT_POSSIBLE_FIX_MESSAGE}\n\n{e}"
521+
) from e
507522
else:
508523
async for response in self._generate_content_streaming(
509524
llm_request, messages, tools, tool_choice, thinking
@@ -528,16 +543,21 @@ async def _generate_content_streaming(
528543
a final aggregated LlmResponse with all content.
529544
"""
530545
model_to_use = self._resolve_model_name(llm_request.model)
531-
raw_stream = await self._anthropic_client.messages.create(
532-
model=model_to_use,
533-
system=llm_request.config.system_instruction,
534-
messages=messages,
535-
tools=tools,
536-
tool_choice=tool_choice,
537-
max_tokens=self.max_tokens,
538-
stream=True,
539-
thinking=thinking,
540-
)
546+
try:
547+
raw_stream = await self._anthropic_client.messages.create(
548+
model=model_to_use,
549+
system=llm_request.config.system_instruction,
550+
messages=messages,
551+
tools=tools,
552+
tool_choice=tool_choice,
553+
max_tokens=self.max_tokens,
554+
stream=True,
555+
thinking=thinking,
556+
)
557+
except anthropic.RateLimitError as e:
558+
raise AnthropicRateLimitError(
559+
f"{_RATE_LIMIT_POSSIBLE_FIX_MESSAGE}\n\n{e}"
560+
) from e
541561

542562
# Track content blocks being built during streaming.
543563
# Each entry maps a block index to its accumulated state.

tests/unittests/models/test_anthropic_llm.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1905,3 +1905,116 @@ async def test_streaming_redacted_thinking_block_preserved_in_final():
19051905

19061906
text_part = final.content.parts[1]
19071907
assert text_part.text == "Done."
1908+
1909+
1910+
# --- Tests for Anthropic API error handling ---
1911+
1912+
1913+
def _make_non_streaming_request():
1914+
return LlmRequest(
1915+
model="claude-sonnet-4-20250514",
1916+
contents=[Content(role="user", parts=[Part.from_text(text="Hi")])],
1917+
config=types.GenerateContentConfig(system_instruction="Test"),
1918+
)
1919+
1920+
1921+
def _make_rate_limit_error():
1922+
import anthropic
1923+
1924+
mock_response = MagicMock()
1925+
mock_response.status_code = 429
1926+
mock_response.headers = {}
1927+
return anthropic.RateLimitError(
1928+
message="rate limit exceeded",
1929+
response=mock_response,
1930+
body={"error": {"message": "rate limit exceeded"}},
1931+
)
1932+
1933+
1934+
def _make_auth_error():
1935+
import anthropic
1936+
1937+
mock_response = MagicMock()
1938+
mock_response.status_code = 401
1939+
mock_response.headers = {}
1940+
return anthropic.AuthenticationError(
1941+
message="invalid api key",
1942+
response=mock_response,
1943+
body={"error": {"message": "invalid api key"}},
1944+
)
1945+
1946+
1947+
@pytest.mark.asyncio
1948+
async def test_non_streaming_rate_limit_raises_anthropic_rate_limit_error():
1949+
"""RateLimitError is re-raised as AnthropicRateLimitError with helpful message."""
1950+
from google.adk.models.anthropic_llm import AnthropicRateLimitError
1951+
1952+
llm = AnthropicLlm(model="claude-sonnet-4-20250514")
1953+
mock_client = MagicMock()
1954+
mock_client.messages.create = AsyncMock(side_effect=_make_rate_limit_error())
1955+
1956+
with mock.patch.object(llm, "_anthropic_client", mock_client):
1957+
with pytest.raises(AnthropicRateLimitError):
1958+
_ = [
1959+
r
1960+
async for r in llm.generate_content_async(
1961+
_make_non_streaming_request(), stream=False
1962+
)
1963+
]
1964+
1965+
1966+
@pytest.mark.asyncio
1967+
async def test_streaming_rate_limit_raises_anthropic_rate_limit_error():
1968+
"""RateLimitError is re-raised as AnthropicRateLimitError in streaming path."""
1969+
from google.adk.models.anthropic_llm import AnthropicRateLimitError
1970+
1971+
llm = AnthropicLlm(model="claude-sonnet-4-20250514")
1972+
mock_client = MagicMock()
1973+
mock_client.messages.create = AsyncMock(side_effect=_make_rate_limit_error())
1974+
1975+
with mock.patch.object(llm, "_anthropic_client", mock_client):
1976+
with pytest.raises(AnthropicRateLimitError):
1977+
_ = [
1978+
r
1979+
async for r in llm.generate_content_async(
1980+
_make_non_streaming_request(), stream=True
1981+
)
1982+
]
1983+
1984+
1985+
@pytest.mark.asyncio
1986+
async def test_non_streaming_other_errors_propagate():
1987+
"""Non-rate-limit errors propagate unchanged."""
1988+
import anthropic
1989+
1990+
llm = AnthropicLlm(model="claude-sonnet-4-20250514")
1991+
mock_client = MagicMock()
1992+
mock_client.messages.create = AsyncMock(side_effect=_make_auth_error())
1993+
1994+
with mock.patch.object(llm, "_anthropic_client", mock_client):
1995+
with pytest.raises(anthropic.AuthenticationError):
1996+
_ = [
1997+
r
1998+
async for r in llm.generate_content_async(
1999+
_make_non_streaming_request(), stream=False
2000+
)
2001+
]
2002+
2003+
2004+
@pytest.mark.asyncio
2005+
async def test_streaming_other_errors_propagate():
2006+
"""Non-rate-limit errors propagate unchanged in streaming path."""
2007+
import anthropic
2008+
2009+
llm = AnthropicLlm(model="claude-sonnet-4-20250514")
2010+
mock_client = MagicMock()
2011+
mock_client.messages.create = AsyncMock(side_effect=_make_auth_error())
2012+
2013+
with mock.patch.object(llm, "_anthropic_client", mock_client):
2014+
with pytest.raises(anthropic.AuthenticationError):
2015+
_ = [
2016+
r
2017+
async for r in llm.generate_content_async(
2018+
_make_non_streaming_request(), stream=True
2019+
)
2020+
]

0 commit comments

Comments
 (0)