Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/docs.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"navigation": {
"versions": [
{
"version": "0.5.0",
"version": "0.5.1",
"groups": [
{
"group": "Get Started",
Expand Down
8 changes: 8 additions & 0 deletions docs/reference/cli/eval.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ icon: "robot"

The `hud eval` command runs an agent on a tasks file or HuggingFace dataset.

<Note>
**Local Execution Dependencies**: Running Claude or Gemini agents locally requires additional packages:
```bash
uv add "hud-python[agents]"
```
This is not needed for `--remote` execution, which runs on HUD infrastructure.
</Note>

## Usage

```bash
Expand Down
2 changes: 1 addition & 1 deletion hud/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
def trace(*args: object, **kwargs: object) -> EvalContext:
"""Deprecated: Use hud.eval() instead.

.. deprecated:: 0.5.0
.. deprecated:: 0.5.1
hud.trace() is deprecated. Use hud.eval() or env.eval() instead.
"""
warnings.warn(
Expand Down
7 changes: 3 additions & 4 deletions hud/environment/connectors/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,12 @@ def connect_hub(
self._hub_config = hub_config

# Create mcp_config with standard MCP URL and hub slug in headers
# Note: Authorization is injected at request time by httpx/aiohttp hooks
# in hud.eval.instrument (uses contextvar for api_key).
mcp_config = {
"hud": {
"url": settings.hud_mcp_url,
"headers": {
"Authorization": f"Bearer {settings.api_key}",
"Environment-Name": slug,
},
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hub auth fails when Environment imported directly

The Authorization header was removed from connect_hub() and now relies on httpx/aiohttp hooks in hud.eval.instrument to inject auth at request time. However, this module is only auto-imported when using import hud. Users who follow the documented pattern in hud/environment/__init__.py (from hud.environment import Environment) won't trigger the instrumentation, causing hub connections to fail with authentication errors. The old code included the header directly in mcp_config, ensuring auth worked regardless of import path.

Fix in Cursor Fix in Web

"headers": {"Environment-Name": slug},
}
}

Expand Down
38 changes: 15 additions & 23 deletions hud/eval/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,17 +89,13 @@ class EvalContext(Environment):

Example:
```python
# From existing environment
async with env.eval("task") as ctx:
await ctx.call_tool("navigate", url="...")
ctx.reward = 0.9

# Standalone with slug
async with hud.eval("my-org/task:1") as ctx:
# With task (scenario sets reward automatically)
tasks = load_tasks("my-org/task:1")
async with hud.eval(tasks) as ctx:
await agent.run(ctx)
ctx.reward = result.reward
# reward set by scenario evaluate phase in __aexit__

# Blank eval
# Blank eval (manual reward)
async with hud.eval() as ctx:
ctx.reward = compute_reward()
```
Expand Down Expand Up @@ -229,6 +225,9 @@ def from_environment(
# Copy connections from parent - each connector is copied so parallel
# execution gets fresh client instances
ctx._connections = {name: connector.copy() for name, connector in env._connections.items()}

# Note: Auth is injected at request time by httpx/aiohttp hooks in hud.eval.instrument
# using the contextvar set in __aenter__ (supports api_key passed to hud.eval())
ctx._setup_calls = env._setup_calls.copy()
ctx._evaluate_calls = env._evaluate_calls.copy()

Expand Down Expand Up @@ -536,26 +535,19 @@ async def __aenter__(self) -> Self:
self._token = _current_trace_headers.set(self.headers)
self._api_key_token = _current_api_key.set(self._eval_api_key)

# Connect environment (MCP servers, tools)
await super().__aenter__()
# Register trace first (environment connection can fail)
await self._eval_enter()

try:
# Connect environment (MCP servers, tools)
await super().__aenter__()

# Run task scenario setup (if created from_task with scenario)
await self._run_task_scenario_setup()

# Notify backend and print link
await self._eval_enter()
self._print_eval_link()
except BaseException:
except BaseException as e:
# Cleanup if setup fails - __aexit__ won't be called automatically
await super().__aexit__(None, None, None)
# Reset context vars
if self._token is not None:
_current_trace_headers.reset(self._token)
self._token = None
if self._api_key_token is not None:
_current_api_key.reset(self._api_key_token)
self._api_key_token = None
await self.__aexit__(type(e), e, e.__traceback__)
raise

return self
Expand Down
35 changes: 26 additions & 9 deletions hud/eval/instrument.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@ def _get_trace_headers() -> dict[str, str] | None:
return get_current_trace_headers()


def _get_api_key() -> str | None:
"""Get API key from context or settings.

Prefers the contextvar (set by hud.eval(api_key=...)),
falls back to settings (env var HUD_API_KEY).
"""
from hud.eval.context import get_current_api_key

return get_current_api_key() or settings.api_key


def _is_hud_url(url_str: str) -> bool:
"""Check if URL is a HUD service (inference or MCP)."""
parsed = urlparse(url_str)
Expand Down Expand Up @@ -61,11 +72,14 @@ def _httpx_request_hook(request: Any) -> None:
request.headers[key] = value
logger.debug("Added trace headers to request: %s", url_str)

# Auto-inject API key if not present
has_auth = "authorization" in {k.lower() for k in request.headers}
if not has_auth and settings.api_key:
request.headers["Authorization"] = f"Bearer {settings.api_key}"
logger.debug("Added API key auth to request: %s", url_str)
# Auto-inject API key if not present or invalid (prefer contextvar, fallback to settings)
api_key = _get_api_key()
if api_key:
existing_auth = request.headers.get("Authorization", "")
# Override if no auth, empty auth, or invalid "Bearer None"
if not existing_auth or existing_auth in ("Bearer None", "Bearer null", "Bearer "):
request.headers["Authorization"] = f"Bearer {api_key}"
logger.debug("Added API key auth to request: %s", url_str)


async def _async_httpx_request_hook(request: Any) -> None:
Expand Down Expand Up @@ -138,10 +152,13 @@ async def on_request_start(
params.headers[key] = value
logger.debug("Added trace headers to aiohttp request: %s", url_str)

has_auth = "authorization" in {k.lower() for k in params.headers}
if not has_auth and settings.api_key:
params.headers["Authorization"] = f"Bearer {settings.api_key}"
logger.debug("Added API key auth to aiohttp request: %s", url_str)
api_key = _get_api_key()
if api_key:
existing_auth = params.headers.get("Authorization", "")
# Override if no auth, empty auth, or invalid "Bearer None"
if not existing_auth or existing_auth in ("Bearer None", "Bearer null", "Bearer "):
params.headers["Authorization"] = f"Bearer {api_key}"
logger.debug("Added API key auth to aiohttp request: %s", url_str)

trace_config = aiohttp.TraceConfig()
trace_config.on_request_start.append(on_request_start)
Expand Down
2 changes: 1 addition & 1 deletion hud/utils/tests/test_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ def test_import():
"""Test that the package can be imported."""
import hud

assert hud.__version__ == "0.5.0"
assert hud.__version__ == "0.5.1"
2 changes: 1 addition & 1 deletion hud/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

from __future__ import annotations

__version__ = "0.5.0"
__version__ = "0.5.1"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "hud-python"
version = "0.5.0"
version = "0.5.1"
description = "SDK for the HUD platform."
readme = "README.md"
requires-python = ">=3.11, <3.13"
Expand Down
Loading