Skip to content

Commit 6624d1b

Browse files
authored
Merge branch 'NousResearch:main' into main
2 parents 7ca008f + 9d4b3e5 commit 6624d1b

74 files changed

Lines changed: 4713 additions & 543 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.env.example

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,15 @@ OPENCODE_ZEN_API_KEY=
5959
# OpenCode Go provides access to open models (GLM-5, Kimi K2.5, MiniMax M2.5)
6060
# $10/month subscription. Get your key at: https://opencode.ai/auth
6161
OPENCODE_GO_API_KEY=
62+
63+
# =============================================================================
64+
# LLM PROVIDER (Hugging Face Inference Providers)
65+
# =============================================================================
66+
# Hugging Face routes to 20+ open models via unified OpenAI-compatible endpoint.
67+
# Free tier included ($0.10/month), no markup on provider rates.
68+
# Get your token at: https://huggingface.co/settings/tokens
69+
# Required permission: "Make calls to Inference Providers"
70+
HF_TOKEN=
6271
# OPENCODE_GO_BASE_URL=https://opencode.ai/zen/go/v1 # Override default base URL
6372

6473
# =============================================================================

agent/anthropic_adapter.py

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,54 @@
3535
"minimal": "low",
3636
}
3737

38+
# ── Max output token limits per Anthropic model ───────────────────────
39+
# Source: Anthropic docs + Cline model catalog. Anthropic's API requires
40+
# max_tokens as a mandatory field. Previously we hardcoded 16384, which
41+
# starves thinking-enabled models (thinking tokens count toward the limit).
42+
_ANTHROPIC_OUTPUT_LIMITS = {
43+
# Claude 4.6
44+
"claude-opus-4-6": 128_000,
45+
"claude-sonnet-4-6": 64_000,
46+
# Claude 4.5
47+
"claude-opus-4-5": 64_000,
48+
"claude-sonnet-4-5": 64_000,
49+
"claude-haiku-4-5": 64_000,
50+
# Claude 4
51+
"claude-opus-4": 32_000,
52+
"claude-sonnet-4": 64_000,
53+
# Claude 3.7
54+
"claude-3-7-sonnet": 128_000,
55+
# Claude 3.5
56+
"claude-3-5-sonnet": 8_192,
57+
"claude-3-5-haiku": 8_192,
58+
# Claude 3
59+
"claude-3-opus": 4_096,
60+
"claude-3-sonnet": 4_096,
61+
"claude-3-haiku": 4_096,
62+
}
63+
64+
# For any model not in the table, assume the highest current limit.
65+
# Future Anthropic models are unlikely to have *less* output capacity.
66+
_ANTHROPIC_DEFAULT_OUTPUT_LIMIT = 128_000
67+
68+
69+
def _get_anthropic_max_output(model: str) -> int:
70+
"""Look up the max output token limit for an Anthropic model.
71+
72+
Uses substring matching against _ANTHROPIC_OUTPUT_LIMITS so date-stamped
73+
model IDs (claude-sonnet-4-5-20250929) and variant suffixes (:1m, :fast)
74+
resolve correctly. Longest-prefix match wins to avoid e.g. "claude-3-5"
75+
matching before "claude-3-5-sonnet".
76+
"""
77+
m = model.lower()
78+
best_key = ""
79+
best_val = _ANTHROPIC_DEFAULT_OUTPUT_LIMIT
80+
for key, val in _ANTHROPIC_OUTPUT_LIMITS.items():
81+
if key in m and len(key) > len(best_key):
82+
best_key = key
83+
best_val = val
84+
return best_val
85+
3886

3987
def _supports_adaptive_thinking(model: str) -> bool:
4088
"""Return True for Claude 4.6 models that support adaptive thinking."""
@@ -59,6 +107,7 @@ def _supports_adaptive_thinking(model: str) -> bool:
59107
# The version must stay reasonably current — Anthropic rejects OAuth requests
60108
# when the spoofed user-agent version is too far behind the actual release.
61109
_CLAUDE_CODE_VERSION_FALLBACK = "2.1.74"
110+
_claude_code_version_cache: Optional[str] = None
62111

63112

64113
def _detect_claude_code_version() -> str:
@@ -86,11 +135,18 @@ def _detect_claude_code_version() -> str:
86135
return _CLAUDE_CODE_VERSION_FALLBACK
87136

88137

89-
_CLAUDE_CODE_VERSION = _detect_claude_code_version()
90138
_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
91139
_MCP_TOOL_PREFIX = "mcp_"
92140

93141

142+
def _get_claude_code_version() -> str:
143+
"""Lazily detect the installed Claude Code version when OAuth headers need it."""
144+
global _claude_code_version_cache
145+
if _claude_code_version_cache is None:
146+
_claude_code_version_cache = _detect_claude_code_version()
147+
return _claude_code_version_cache
148+
149+
94150
def _is_oauth_token(key: str) -> bool:
95151
"""Check if the key is an OAuth/setup token (not a regular Console API key).
96152
@@ -132,7 +188,7 @@ def build_anthropic_client(api_key: str, base_url: str = None):
132188
kwargs["auth_token"] = api_key
133189
kwargs["default_headers"] = {
134190
"anthropic-beta": ",".join(all_betas),
135-
"user-agent": f"claude-cli/{_CLAUDE_CODE_VERSION} (external, cli)",
191+
"user-agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
136192
"x-app": "cli",
137193
}
138194
else:
@@ -241,7 +297,7 @@ def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
241297

242298
headers = {
243299
"Content-Type": "application/json",
244-
"User-Agent": f"claude-cli/{_CLAUDE_CODE_VERSION} (external, cli)",
300+
"User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
245301
}
246302

247303
for endpoint in token_endpoints:
@@ -810,9 +866,15 @@ def build_anthropic_kwargs(
810866
tool_choice: Optional[str] = None,
811867
is_oauth: bool = False,
812868
preserve_dots: bool = False,
869+
context_length: Optional[int] = None,
813870
) -> Dict[str, Any]:
814871
"""Build kwargs for anthropic.messages.create().
815872
873+
When *max_tokens* is None, the model's native output limit is used
874+
(e.g. 128K for Opus 4.6, 64K for Sonnet 4.6). If *context_length*
875+
is provided, the effective limit is clamped so it doesn't exceed
876+
the context window.
877+
816878
When *is_oauth* is True, applies Claude Code compatibility transforms:
817879
system prompt prefix, tool name prefixing, and prompt sanitization.
818880
@@ -823,7 +885,12 @@ def build_anthropic_kwargs(
823885
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
824886

825887
model = normalize_model_name(model, preserve_dots=preserve_dots)
826-
effective_max_tokens = max_tokens or 16384
888+
effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
889+
890+
# Clamp to context window if the user set a lower context_length
891+
# (e.g. custom endpoint with limited capacity).
892+
if context_length and effective_max_tokens > context_length:
893+
effective_max_tokens = max(context_length - 1, 1)
827894

828895
# ── OAuth: Claude Code identity ──────────────────────────────────
829896
if is_oauth:

agent/auxiliary_client.py

Lines changed: 115 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1137,7 +1137,13 @@ def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[
11371137
return "custom", client, final_model
11381138

11391139
if requested == "auto":
1140-
for candidate in get_available_vision_backends():
1140+
ordered = list(_VISION_AUTO_PROVIDER_ORDER)
1141+
preferred = _preferred_main_vision_provider()
1142+
if preferred in ordered:
1143+
ordered.remove(preferred)
1144+
ordered.insert(0, preferred)
1145+
1146+
for candidate in ordered:
11411147
sync_client, default_model = _resolve_strict_vision_backend(candidate)
11421148
if sync_client is not None:
11431149
return _finalize(candidate, sync_client, default_model)
@@ -1210,6 +1216,39 @@ def auxiliary_max_tokens_param(value: int) -> dict:
12101216
_client_cache_lock = threading.Lock()
12111217

12121218

1219+
def neuter_async_httpx_del() -> None:
1220+
"""Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.
1221+
1222+
The OpenAI SDK's ``AsyncHttpxClientWrapper.__del__`` schedules
1223+
``self.aclose()`` via ``asyncio.get_running_loop().create_task()``.
1224+
When an ``AsyncOpenAI`` client is garbage-collected while
1225+
prompt_toolkit's event loop is running (the common CLI idle state),
1226+
the ``aclose()`` task runs on prompt_toolkit's loop but the
1227+
underlying TCP transport is bound to a *different* loop (the worker
1228+
thread's loop that the client was originally created on). If that
1229+
loop is closed or its thread is dead, the transport's
1230+
``self._loop.call_soon()`` raises ``RuntimeError("Event loop is
1231+
closed")``, which prompt_toolkit surfaces as "Unhandled exception
1232+
in event loop ... Press ENTER to continue...".
1233+
1234+
Neutering ``__del__`` is safe because:
1235+
- Cached clients are explicitly cleaned via ``_force_close_async_httpx``
1236+
on stale-loop detection and ``shutdown_cached_clients`` on exit.
1237+
- Uncached clients' TCP connections are cleaned up by the OS when the
1238+
process exits.
1239+
- The OpenAI SDK itself marks this as a TODO (``# TODO(someday):
1240+
support non asyncio runtimes here``).
1241+
1242+
Call this once at CLI startup, before any ``AsyncOpenAI`` clients are
1243+
created.
1244+
"""
1245+
try:
1246+
from openai._base_client import AsyncHttpxClientWrapper
1247+
AsyncHttpxClientWrapper.__del__ = lambda self: None # type: ignore[assignment]
1248+
except (ImportError, AttributeError):
1249+
pass # Graceful degradation if the SDK changes its internals
1250+
1251+
12131252
def _force_close_async_httpx(client: Any) -> None:
12141253
"""Mark the httpx AsyncClient inside an AsyncOpenAI client as closed.
12151254
@@ -1257,6 +1296,25 @@ def shutdown_cached_clients() -> None:
12571296
_client_cache.clear()
12581297

12591298

1299+
def cleanup_stale_async_clients() -> None:
1300+
"""Force-close cached async clients whose event loop is closed.
1301+
1302+
Call this after each agent turn to proactively clean up stale clients
1303+
before GC can trigger ``AsyncHttpxClientWrapper.__del__`` on them.
1304+
This is defense-in-depth — the primary fix is ``neuter_async_httpx_del``
1305+
which disables ``__del__`` entirely.
1306+
"""
1307+
with _client_cache_lock:
1308+
stale_keys = []
1309+
for key, entry in _client_cache.items():
1310+
client, _default, cached_loop = entry
1311+
if cached_loop is not None and cached_loop.is_closed():
1312+
_force_close_async_httpx(client)
1313+
stale_keys.append(key)
1314+
for key in stale_keys:
1315+
del _client_cache[key]
1316+
1317+
12601318
def _get_cached_client(
12611319
provider: str,
12621320
model: str = None,
@@ -1558,6 +1616,62 @@ def call_llm(
15581616
raise
15591617

15601618

1619+
def extract_content_or_reasoning(response) -> str:
1620+
"""Extract content from an LLM response, falling back to reasoning fields.
1621+
1622+
Mirrors the main agent loop's behavior when a reasoning model (DeepSeek-R1,
1623+
Qwen-QwQ, etc.) returns ``content=None`` with reasoning in structured fields.
1624+
1625+
Resolution order:
1626+
1. ``message.content`` — strip inline think/reasoning blocks, check for
1627+
remaining non-whitespace text.
1628+
2. ``message.reasoning`` / ``message.reasoning_content`` — direct
1629+
structured reasoning fields (DeepSeek, Moonshot, Novita, etc.).
1630+
3. ``message.reasoning_details`` — OpenRouter unified array format.
1631+
1632+
Returns the best available text, or ``""`` if nothing found.
1633+
"""
1634+
import re
1635+
1636+
msg = response.choices[0].message
1637+
content = (msg.content or "").strip()
1638+
1639+
if content:
1640+
# Strip inline think/reasoning blocks (mirrors _strip_think_blocks)
1641+
cleaned = re.sub(
1642+
r"<(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>"
1643+
r".*?"
1644+
r"</(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>",
1645+
"", content, flags=re.DOTALL | re.IGNORECASE,
1646+
).strip()
1647+
if cleaned:
1648+
return cleaned
1649+
1650+
# Content is empty or reasoning-only — try structured reasoning fields
1651+
reasoning_parts: list[str] = []
1652+
for field in ("reasoning", "reasoning_content"):
1653+
val = getattr(msg, field, None)
1654+
if val and isinstance(val, str) and val.strip() and val not in reasoning_parts:
1655+
reasoning_parts.append(val.strip())
1656+
1657+
details = getattr(msg, "reasoning_details", None)
1658+
if details and isinstance(details, list):
1659+
for detail in details:
1660+
if isinstance(detail, dict):
1661+
summary = (
1662+
detail.get("summary")
1663+
or detail.get("content")
1664+
or detail.get("text")
1665+
)
1666+
if summary and summary not in reasoning_parts:
1667+
reasoning_parts.append(summary.strip() if isinstance(summary, str) else str(summary))
1668+
1669+
if reasoning_parts:
1670+
return "\n\n".join(reasoning_parts)
1671+
1672+
return ""
1673+
1674+
15611675
async def async_call_llm(
15621676
task: str = None,
15631677
*,

agent/context_references.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -286,12 +286,16 @@ def _expand_git_reference(
286286
args: list[str],
287287
label: str,
288288
) -> tuple[str | None, str | None]:
289-
result = subprocess.run(
290-
["git", *args],
291-
cwd=cwd,
292-
capture_output=True,
293-
text=True,
294-
)
289+
try:
290+
result = subprocess.run(
291+
["git", *args],
292+
cwd=cwd,
293+
capture_output=True,
294+
text=True,
295+
timeout=30,
296+
)
297+
except subprocess.TimeoutExpired:
298+
return f"{ref.raw}: git command timed out (30s)", None
295299
if result.returncode != 0:
296300
stderr = (result.stderr or "").strip() or "git command failed"
297301
return f"{ref.raw}: {stderr}", None
@@ -449,9 +453,12 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
449453
cwd=cwd,
450454
capture_output=True,
451455
text=True,
456+
timeout=10,
452457
)
453458
except FileNotFoundError:
454459
return None
460+
except subprocess.TimeoutExpired:
461+
return None
455462
if result.returncode != 0:
456463
return None
457464
files = [Path(line.strip()) for line in result.stdout.splitlines() if line.strip()]

agent/display.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -699,7 +699,7 @@ def format_context_pressure(
699699
threshold_percent: Compaction threshold as a fraction of context window.
700700
compression_enabled: Whether auto-compression is active.
701701
"""
702-
pct_int = int(compaction_progress * 100)
702+
pct_int = min(int(compaction_progress * 100), 100)
703703
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
704704
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
705705

@@ -729,7 +729,7 @@ def format_context_pressure_gateway(
729729
No ANSI — just Unicode and plain text suitable for Telegram/Discord/etc.
730730
The percentage shows progress toward the compaction threshold.
731731
"""
732-
pct_int = int(compaction_progress * 100)
732+
pct_int = min(int(compaction_progress * 100), 100)
733733
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
734734
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
735735

agent/model_metadata.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,15 @@ def _strip_provider_prefix(model: str) -> str:
113113
"glm": 202752,
114114
# Kimi
115115
"kimi": 262144,
116+
# Hugging Face Inference Providers — model IDs use org/name format
117+
"Qwen/Qwen3.5-397B-A17B": 131072,
118+
"Qwen/Qwen3.5-35B-A3B": 131072,
119+
"deepseek-ai/DeepSeek-V3.2": 65536,
120+
"moonshotai/Kimi-K2.5": 262144,
121+
"moonshotai/Kimi-K2-Thinking": 262144,
122+
"MiniMaxAI/MiniMax-M2.5": 204800,
123+
"XiaomiMiMo/MiMo-V2-Flash": 32768,
124+
"zai-org/GLM-5": 202752,
116125
}
117126

118127
_CONTEXT_LENGTH_KEYS = (

0 commit comments

Comments
 (0)