diff --git a/docs/docs.json b/docs/docs.json index 2b73c81c..ee4dcf46 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -29,7 +29,7 @@ "navigation": { "versions": [ { - "version": "0.5.0", + "version": "0.5.1", "groups": [ { "group": "Get Started", diff --git a/docs/reference/cli/eval.mdx b/docs/reference/cli/eval.mdx index e8be3c21..443df615 100644 --- a/docs/reference/cli/eval.mdx +++ b/docs/reference/cli/eval.mdx @@ -6,6 +6,14 @@ icon: "robot" The `hud eval` command runs an agent on a tasks file or HuggingFace dataset. + +**Local Execution Dependencies**: Running Claude or Gemini agents locally requires additional packages: +```bash +uv add "hud-python[agents]" +``` +This is not needed for `--remote` execution, which runs on HUD infrastructure. + + ## Usage ```bash diff --git a/hud/__init__.py b/hud/__init__.py index cf88add5..24333f1e 100644 --- a/hud/__init__.py +++ b/hud/__init__.py @@ -18,7 +18,7 @@ def trace(*args: object, **kwargs: object) -> EvalContext: """Deprecated: Use hud.eval() instead. - .. deprecated:: 0.5.0 + .. deprecated:: 0.5.1 hud.trace() is deprecated. Use hud.eval() or env.eval() instead. """ warnings.warn( diff --git a/hud/environment/connectors/remote.py b/hud/environment/connectors/remote.py index b5cdda0b..866af24b 100644 --- a/hud/environment/connectors/remote.py +++ b/hud/environment/connectors/remote.py @@ -61,13 +61,12 @@ def connect_hub( self._hub_config = hub_config # Create mcp_config with standard MCP URL and hub slug in headers + # Note: Authorization is injected at request time by httpx/aiohttp hooks + # in hud.eval.instrument (uses contextvar for api_key). mcp_config = { "hud": { "url": settings.hud_mcp_url, - "headers": { - "Authorization": f"Bearer {settings.api_key}", - "Environment-Name": slug, - }, + "headers": {"Environment-Name": slug}, } } diff --git a/hud/eval/context.py b/hud/eval/context.py index 77bb752d..ca0704f5 100644 --- a/hud/eval/context.py +++ b/hud/eval/context.py @@ -89,17 +89,13 @@ class EvalContext(Environment): Example: ```python - # From existing environment - async with env.eval("task") as ctx: - await ctx.call_tool("navigate", url="...") - ctx.reward = 0.9 - - # Standalone with slug - async with hud.eval("my-org/task:1") as ctx: + # With task (scenario sets reward automatically) + tasks = load_tasks("my-org/task:1") + async with hud.eval(tasks) as ctx: await agent.run(ctx) - ctx.reward = result.reward + # reward set by scenario evaluate phase in __aexit__ - # Blank eval + # Blank eval (manual reward) async with hud.eval() as ctx: ctx.reward = compute_reward() ``` @@ -229,6 +225,9 @@ def from_environment( # Copy connections from parent - each connector is copied so parallel # execution gets fresh client instances ctx._connections = {name: connector.copy() for name, connector in env._connections.items()} + + # Note: Auth is injected at request time by httpx/aiohttp hooks in hud.eval.instrument + # using the contextvar set in __aenter__ (supports api_key passed to hud.eval()) ctx._setup_calls = env._setup_calls.copy() ctx._evaluate_calls = env._evaluate_calls.copy() @@ -536,26 +535,19 @@ async def __aenter__(self) -> Self: self._token = _current_trace_headers.set(self.headers) self._api_key_token = _current_api_key.set(self._eval_api_key) - # Connect environment (MCP servers, tools) - await super().__aenter__() + # Register trace first (environment connection can fail) + await self._eval_enter() try: + # Connect environment (MCP servers, tools) + await super().__aenter__() + # Run task scenario setup (if created from_task with scenario) await self._run_task_scenario_setup() - - # Notify backend and print link - await self._eval_enter() self._print_eval_link() - except BaseException: + except BaseException as e: # Cleanup if setup fails - __aexit__ won't be called automatically - await super().__aexit__(None, None, None) - # Reset context vars - if self._token is not None: - _current_trace_headers.reset(self._token) - self._token = None - if self._api_key_token is not None: - _current_api_key.reset(self._api_key_token) - self._api_key_token = None + await self.__aexit__(type(e), e, e.__traceback__) raise return self diff --git a/hud/eval/instrument.py b/hud/eval/instrument.py index 94598f1f..c2767f78 100644 --- a/hud/eval/instrument.py +++ b/hud/eval/instrument.py @@ -26,6 +26,17 @@ def _get_trace_headers() -> dict[str, str] | None: return get_current_trace_headers() +def _get_api_key() -> str | None: + """Get API key from context or settings. + + Prefers the contextvar (set by hud.eval(api_key=...)), + falls back to settings (env var HUD_API_KEY). + """ + from hud.eval.context import get_current_api_key + + return get_current_api_key() or settings.api_key + + def _is_hud_url(url_str: str) -> bool: """Check if URL is a HUD service (inference or MCP).""" parsed = urlparse(url_str) @@ -61,11 +72,14 @@ def _httpx_request_hook(request: Any) -> None: request.headers[key] = value logger.debug("Added trace headers to request: %s", url_str) - # Auto-inject API key if not present - has_auth = "authorization" in {k.lower() for k in request.headers} - if not has_auth and settings.api_key: - request.headers["Authorization"] = f"Bearer {settings.api_key}" - logger.debug("Added API key auth to request: %s", url_str) + # Auto-inject API key if not present or invalid (prefer contextvar, fallback to settings) + api_key = _get_api_key() + if api_key: + existing_auth = request.headers.get("Authorization", "") + # Override if no auth, empty auth, or invalid "Bearer None" + if not existing_auth or existing_auth in ("Bearer None", "Bearer null", "Bearer "): + request.headers["Authorization"] = f"Bearer {api_key}" + logger.debug("Added API key auth to request: %s", url_str) async def _async_httpx_request_hook(request: Any) -> None: @@ -138,10 +152,13 @@ async def on_request_start( params.headers[key] = value logger.debug("Added trace headers to aiohttp request: %s", url_str) - has_auth = "authorization" in {k.lower() for k in params.headers} - if not has_auth and settings.api_key: - params.headers["Authorization"] = f"Bearer {settings.api_key}" - logger.debug("Added API key auth to aiohttp request: %s", url_str) + api_key = _get_api_key() + if api_key: + existing_auth = params.headers.get("Authorization", "") + # Override if no auth, empty auth, or invalid "Bearer None" + if not existing_auth or existing_auth in ("Bearer None", "Bearer null", "Bearer "): + params.headers["Authorization"] = f"Bearer {api_key}" + logger.debug("Added API key auth to aiohttp request: %s", url_str) trace_config = aiohttp.TraceConfig() trace_config.on_request_start.append(on_request_start) diff --git a/hud/utils/tests/test_version.py b/hud/utils/tests/test_version.py index 13da2b66..3ad007d1 100644 --- a/hud/utils/tests/test_version.py +++ b/hud/utils/tests/test_version.py @@ -5,4 +5,4 @@ def test_import(): """Test that the package can be imported.""" import hud - assert hud.__version__ == "0.5.0" + assert hud.__version__ == "0.5.1" diff --git a/hud/version.py b/hud/version.py index b16da8fa..be84dad3 100644 --- a/hud/version.py +++ b/hud/version.py @@ -4,4 +4,4 @@ from __future__ import annotations -__version__ = "0.5.0" +__version__ = "0.5.1" diff --git a/pyproject.toml b/pyproject.toml index e5f4165d..480ebb94 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "hud-python" -version = "0.5.0" +version = "0.5.1" description = "SDK for the HUD platform." readme = "README.md" requires-python = ">=3.11, <3.13"