hud-evals · lorenss-m · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026
diff --git a/docs/docs.json b/docs/docs.json
@@ -29,7 +29,7 @@
   "navigation": {
     "versions": [
       {
-        "version": "0.5.0",
+        "version": "0.5.1",
         "groups": [
           {
             "group": "Get Started",

diff --git a/docs/reference/cli/eval.mdx b/docs/reference/cli/eval.mdx
@@ -6,6 +6,14 @@ icon: "robot"
 
 The `hud eval` command runs an agent on a tasks file or HuggingFace dataset.
 
+<Note>
+**Local Execution Dependencies**: Running Claude or Gemini agents locally requires additional packages:
+```bash
+uv add "hud-python[agents]"
+```
+This is not needed for `--remote` execution, which runs on HUD infrastructure.
+</Note>
+
 ## Usage
 
 ```bash

diff --git a/hud/__init__.py b/hud/__init__.py
@@ -18,7 +18,7 @@
 def trace(*args: object, **kwargs: object) -> EvalContext:
     """Deprecated: Use hud.eval() instead.
 
-    .. deprecated:: 0.5.0
+    .. deprecated:: 0.5.1
         hud.trace() is deprecated. Use hud.eval() or env.eval() instead.
     """
     warnings.warn(

diff --git a/hud/environment/connectors/remote.py b/hud/environment/connectors/remote.py
@@ -61,13 +61,12 @@ def connect_hub(
             self._hub_config = hub_config
 
         # Create mcp_config with standard MCP URL and hub slug in headers
+        # Note: Authorization is injected at request time by httpx/aiohttp hooks
+        # in hud.eval.instrument (uses contextvar for api_key).
         mcp_config = {
             "hud": {
                 "url": settings.hud_mcp_url,
-                "headers": {
-                    "Authorization": f"Bearer {settings.api_key}",
-                    "Environment-Name": slug,
-                },
+                "headers": {"Environment-Name": slug},
             }
         }
 

diff --git a/hud/eval/context.py b/hud/eval/context.py
@@ -89,17 +89,13 @@ class EvalContext(Environment):
 
     Example:
         ```python
-        # From existing environment
-        async with env.eval("task") as ctx:
-            await ctx.call_tool("navigate", url="...")
-            ctx.reward = 0.9
-
-        # Standalone with slug
-        async with hud.eval("my-org/task:1") as ctx:
+        # With task (scenario sets reward automatically)
+        tasks = load_tasks("my-org/task:1")
+        async with hud.eval(tasks) as ctx:
             await agent.run(ctx)
-            ctx.reward = result.reward
+            # reward set by scenario evaluate phase in __aexit__
 
-        # Blank eval
+        # Blank eval (manual reward)
         async with hud.eval() as ctx:
             ctx.reward = compute_reward()
         ```
@@ -229,6 +225,9 @@ def from_environment(
         # Copy connections from parent - each connector is copied so parallel
         # execution gets fresh client instances
         ctx._connections = {name: connector.copy() for name, connector in env._connections.items()}
+
+        # Note: Auth is injected at request time by httpx/aiohttp hooks in hud.eval.instrument
+        # using the contextvar set in __aenter__ (supports api_key passed to hud.eval())
         ctx._setup_calls = env._setup_calls.copy()
         ctx._evaluate_calls = env._evaluate_calls.copy()
 
@@ -536,26 +535,19 @@ async def __aenter__(self) -> Self:
         self._token = _current_trace_headers.set(self.headers)
         self._api_key_token = _current_api_key.set(self._eval_api_key)
 
-        # Connect environment (MCP servers, tools)
-        await super().__aenter__()
+        # Register trace first (environment connection can fail)
+        await self._eval_enter()
 
         try:
+            # Connect environment (MCP servers, tools)
+            await super().__aenter__()
+
             # Run task scenario setup (if created from_task with scenario)
             await self._run_task_scenario_setup()
-
-            # Notify backend and print link
-            await self._eval_enter()
             self._print_eval_link()
-        except BaseException:
+        except BaseException as e:
             # Cleanup if setup fails - __aexit__ won't be called automatically
-            await super().__aexit__(None, None, None)
-            # Reset context vars
-            if self._token is not None:
-                _current_trace_headers.reset(self._token)
-                self._token = None
-            if self._api_key_token is not None:
-                _current_api_key.reset(self._api_key_token)
-                self._api_key_token = None
+            await self.__aexit__(type(e), e, e.__traceback__)
             raise
 
         return self

diff --git a/hud/eval/instrument.py b/hud/eval/instrument.py
@@ -26,6 +26,17 @@ def _get_trace_headers() -> dict[str, str] | None:
     return get_current_trace_headers()
 
 
+def _get_api_key() -> str | None:
+    """Get API key from context or settings.
+
+    Prefers the contextvar (set by hud.eval(api_key=...)),
+    falls back to settings (env var HUD_API_KEY).
+    """
+    from hud.eval.context import get_current_api_key
+
+    return get_current_api_key() or settings.api_key
+
+
 def _is_hud_url(url_str: str) -> bool:
     """Check if URL is a HUD service (inference or MCP)."""
     parsed = urlparse(url_str)
@@ -61,11 +72,14 @@ def _httpx_request_hook(request: Any) -> None:
             request.headers[key] = value
         logger.debug("Added trace headers to request: %s", url_str)
 
-    # Auto-inject API key if not present
-    has_auth = "authorization" in {k.lower() for k in request.headers}
-    if not has_auth and settings.api_key:
-        request.headers["Authorization"] = f"Bearer {settings.api_key}"
-        logger.debug("Added API key auth to request: %s", url_str)
+    # Auto-inject API key if not present or invalid (prefer contextvar, fallback to settings)
+    api_key = _get_api_key()
+    if api_key:
+        existing_auth = request.headers.get("Authorization", "")
+        # Override if no auth, empty auth, or invalid "Bearer None"
+        if not existing_auth or existing_auth in ("Bearer None", "Bearer null", "Bearer "):
+            request.headers["Authorization"] = f"Bearer {api_key}"
+            logger.debug("Added API key auth to request: %s", url_str)
 
 
 async def _async_httpx_request_hook(request: Any) -> None:
@@ -138,10 +152,13 @@ async def on_request_start(
                 params.headers[key] = value
             logger.debug("Added trace headers to aiohttp request: %s", url_str)
 
-        has_auth = "authorization" in {k.lower() for k in params.headers}
-        if not has_auth and settings.api_key:
-            params.headers["Authorization"] = f"Bearer {settings.api_key}"
-            logger.debug("Added API key auth to aiohttp request: %s", url_str)
+        api_key = _get_api_key()
+        if api_key:
+            existing_auth = params.headers.get("Authorization", "")
+            # Override if no auth, empty auth, or invalid "Bearer None"
+            if not existing_auth or existing_auth in ("Bearer None", "Bearer null", "Bearer "):
+                params.headers["Authorization"] = f"Bearer {api_key}"
+                logger.debug("Added API key auth to aiohttp request: %s", url_str)
 
     trace_config = aiohttp.TraceConfig()
     trace_config.on_request_start.append(on_request_start)

diff --git a/hud/utils/tests/test_version.py b/hud/utils/tests/test_version.py
@@ -5,4 +5,4 @@ def test_import():
     """Test that the package can be imported."""
     import hud
 
-    assert hud.__version__ == "0.5.0"
+    assert hud.__version__ == "0.5.1"
diff --git a/hud/version.py b/hud/version.py
@@ -4,4 +4,4 @@
 
 from __future__ import annotations
 
-__version__ = "0.5.0"
+__version__ = "0.5.1"
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "hud-python"
-version = "0.5.0"
+version = "0.5.1"
 description = "SDK for the HUD platform."
 readme = "README.md"
 requires-python = ">=3.11, <3.13"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,4 +4,4 @@

		from __future__ import annotations

		__version__ = "0.5.0"
		__version__ = "0.5.1"