3535 "minimal" : "low" ,
3636}
3737
38+ # ── Max output token limits per Anthropic model ───────────────────────
39+ # Source: Anthropic docs + Cline model catalog. Anthropic's API requires
40+ # max_tokens as a mandatory field. Previously we hardcoded 16384, which
41+ # starves thinking-enabled models (thinking tokens count toward the limit).
42+ _ANTHROPIC_OUTPUT_LIMITS = {
43+ # Claude 4.6
44+ "claude-opus-4-6" : 128_000 ,
45+ "claude-sonnet-4-6" : 64_000 ,
46+ # Claude 4.5
47+ "claude-opus-4-5" : 64_000 ,
48+ "claude-sonnet-4-5" : 64_000 ,
49+ "claude-haiku-4-5" : 64_000 ,
50+ # Claude 4
51+ "claude-opus-4" : 32_000 ,
52+ "claude-sonnet-4" : 64_000 ,
53+ # Claude 3.7
54+ "claude-3-7-sonnet" : 128_000 ,
55+ # Claude 3.5
56+ "claude-3-5-sonnet" : 8_192 ,
57+ "claude-3-5-haiku" : 8_192 ,
58+ # Claude 3
59+ "claude-3-opus" : 4_096 ,
60+ "claude-3-sonnet" : 4_096 ,
61+ "claude-3-haiku" : 4_096 ,
62+ }
63+
64+ # For any model not in the table, assume the highest current limit.
65+ # Future Anthropic models are unlikely to have *less* output capacity.
66+ _ANTHROPIC_DEFAULT_OUTPUT_LIMIT = 128_000
67+
68+
69+ def _get_anthropic_max_output (model : str ) -> int :
70+ """Look up the max output token limit for an Anthropic model.
71+
72+ Uses substring matching against _ANTHROPIC_OUTPUT_LIMITS so date-stamped
73+ model IDs (claude-sonnet-4-5-20250929) and variant suffixes (:1m, :fast)
74+ resolve correctly. Longest-prefix match wins to avoid e.g. "claude-3-5"
75+ matching before "claude-3-5-sonnet".
76+ """
77+ m = model .lower ()
78+ best_key = ""
79+ best_val = _ANTHROPIC_DEFAULT_OUTPUT_LIMIT
80+ for key , val in _ANTHROPIC_OUTPUT_LIMITS .items ():
81+ if key in m and len (key ) > len (best_key ):
82+ best_key = key
83+ best_val = val
84+ return best_val
85+
3886
3987def _supports_adaptive_thinking (model : str ) -> bool :
4088 """Return True for Claude 4.6 models that support adaptive thinking."""
@@ -59,6 +107,7 @@ def _supports_adaptive_thinking(model: str) -> bool:
59107# The version must stay reasonably current — Anthropic rejects OAuth requests
60108# when the spoofed user-agent version is too far behind the actual release.
61109_CLAUDE_CODE_VERSION_FALLBACK = "2.1.74"
110+ _claude_code_version_cache : Optional [str ] = None
62111
63112
64113def _detect_claude_code_version () -> str :
@@ -86,11 +135,18 @@ def _detect_claude_code_version() -> str:
86135 return _CLAUDE_CODE_VERSION_FALLBACK
87136
88137
89- _CLAUDE_CODE_VERSION = _detect_claude_code_version ()
90138_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
91139_MCP_TOOL_PREFIX = "mcp_"
92140
93141
142+ def _get_claude_code_version () -> str :
143+ """Lazily detect the installed Claude Code version when OAuth headers need it."""
144+ global _claude_code_version_cache
145+ if _claude_code_version_cache is None :
146+ _claude_code_version_cache = _detect_claude_code_version ()
147+ return _claude_code_version_cache
148+
149+
94150def _is_oauth_token (key : str ) -> bool :
95151 """Check if the key is an OAuth/setup token (not a regular Console API key).
96152
@@ -132,7 +188,7 @@ def build_anthropic_client(api_key: str, base_url: str = None):
132188 kwargs ["auth_token" ] = api_key
133189 kwargs ["default_headers" ] = {
134190 "anthropic-beta" : "," .join (all_betas ),
135- "user-agent" : f"claude-cli/{ _CLAUDE_CODE_VERSION } (external, cli)" ,
191+ "user-agent" : f"claude-cli/{ _get_claude_code_version () } (external, cli)" ,
136192 "x-app" : "cli" ,
137193 }
138194 else :
@@ -241,7 +297,7 @@ def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
241297
242298 headers = {
243299 "Content-Type" : "application/json" ,
244- "User-Agent" : f"claude-cli/{ _CLAUDE_CODE_VERSION } (external, cli)" ,
300+ "User-Agent" : f"claude-cli/{ _get_claude_code_version () } (external, cli)" ,
245301 }
246302
247303 for endpoint in token_endpoints :
@@ -810,9 +866,15 @@ def build_anthropic_kwargs(
810866 tool_choice : Optional [str ] = None ,
811867 is_oauth : bool = False ,
812868 preserve_dots : bool = False ,
869+ context_length : Optional [int ] = None ,
813870) -> Dict [str , Any ]:
814871 """Build kwargs for anthropic.messages.create().
815872
873+ When *max_tokens* is None, the model's native output limit is used
874+ (e.g. 128K for Opus 4.6, 64K for Sonnet 4.6). If *context_length*
875+ is provided, the effective limit is clamped so it doesn't exceed
876+ the context window.
877+
816878 When *is_oauth* is True, applies Claude Code compatibility transforms:
817879 system prompt prefix, tool name prefixing, and prompt sanitization.
818880
@@ -823,7 +885,12 @@ def build_anthropic_kwargs(
823885 anthropic_tools = convert_tools_to_anthropic (tools ) if tools else []
824886
825887 model = normalize_model_name (model , preserve_dots = preserve_dots )
826- effective_max_tokens = max_tokens or 16384
888+ effective_max_tokens = max_tokens or _get_anthropic_max_output (model )
889+
890+ # Clamp to context window if the user set a lower context_length
891+ # (e.g. custom endpoint with limited capacity).
892+ if context_length and effective_max_tokens > context_length :
893+ effective_max_tokens = max (context_length - 1 , 1 )
827894
828895 # ── OAuth: Claude Code identity ──────────────────────────────────
829896 if is_oauth :
0 commit comments