Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions hud/agents/openai_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,18 @@ def __init__(self, params: OpenAIChatCreateParams | None = None, **kwargs: Any)
super().__init__(params, **kwargs)
self.config: OpenAIChatConfig

if (
self.config.api_key
and self.config.base_url
and settings.hud_gateway_url in self.config.base_url
and settings.api_key
and self.config.api_key != settings.api_key
):
raise ValueError(
"OpenAIChatAgent api_key is not allowed with HUD Gateway. "
"Use HUD_API_KEY for gateway auth and BYOK headers for provider keys."
)

if self.config.openai_client is not None:
self.oai = self.config.openai_client
elif self.config.api_key is not None or self.config.base_url is not None:
Expand Down
31 changes: 24 additions & 7 deletions hud/cli/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ class AgentPreset:
# max_concurrent = 30
# max_steps = 10
# group_size = 1
# byok = false # Remote only; use encrypted env vars on the platform.
# task_ids = ["task_1", "task_2"]
# verbose = true
# very_verbose = true
Expand Down Expand Up @@ -158,6 +159,7 @@ class EvalConfig(BaseModel):
"verbose",
"very_verbose",
"group_size",
"byok",
"remote",
"auto_respond",
"quiet",
Expand All @@ -178,6 +180,7 @@ class EvalConfig(BaseModel):
very_verbose: bool = False
auto_respond: bool | None = None # Continue without prompting (default: True for --full)
group_size: int = 1
byok: bool = False
remote: bool = False
quiet: bool = False # Suppress opening browser for eval links
gateway: bool = False # Use HUD Gateway for LLM API calls
Expand Down Expand Up @@ -208,6 +211,11 @@ def _parse_agent_type(cls, v: Any) -> AgentType | None:

def validate_api_keys(self) -> None:
"""Validate required API keys for the selected agent. Raises typer.Exit on failure."""
# BYOK requires remote execution (check before agent_type guard)
if self.byok and not self.remote:
hud_console.error("--byok requires --remote (BYOK only works with remote execution)")
raise typer.Exit(1)

if self.agent_type is None:
return

Expand Down Expand Up @@ -284,14 +292,11 @@ def get_agent_kwargs(self) -> dict[str, Any]:
if self.model:
kwargs["model"] = self.model

if self.agent_type == AgentType.OPENAI_COMPATIBLE:
# For gateway base_url, inject HUD API key if not already set
if self.agent_type == AgentType.OPENAI_COMPATIBLE and "api_key" not in kwargs:
base_url = kwargs.get("base_url", "")
if "api_key" not in kwargs:
# Use HUD API key for gateway, otherwise fall back to OpenAI API key
if settings.hud_gateway_url in base_url:
kwargs["api_key"] = settings.api_key
elif settings.openai_api_key:
kwargs["api_key"] = settings.openai_api_key
if settings.hud_gateway_url in base_url and settings.api_key:
kwargs["api_key"] = settings.api_key

# Auto-detect Bedrock when Claude is selected with a Bedrock ARN
# Check both model and checkpoint_name for ARN patterns
Expand Down Expand Up @@ -559,6 +564,8 @@ def display(self) -> None:
table.add_row("remote", "[bold green]True[/bold green] (submitting to platform)")
if self.gateway:
table.add_row("gateway", "[bold green]True[/bold green] (routing via HUD Gateway)")
if self.byok:
table.add_row("byok", "[bold green]True[/bold green] (remote only)")

# Tool filters (only if set)
if self.allowed_tools:
Expand Down Expand Up @@ -659,6 +666,9 @@ async def _run_evaluation(cfg: EvalConfig) -> tuple[list[Any], list[Any]]:

# Remote execution - submit to HUD platform
if cfg.remote:
agent_kwargs = {
k: v for k, v in agent_kwargs.items() if k not in ("api_key", "model_client")
}
# Create a job ID for tracking
import uuid

Expand All @@ -676,6 +686,7 @@ async def _run_evaluation(cfg: EvalConfig) -> tuple[list[Any], list[Any]]:
agent_params=agent_kwargs,
max_steps=max_steps,
group_size=cfg.group_size,
use_byok=cfg.byok,
)

hud_console.success(f"Tasks submitted. View at: https://hud.ai/jobs/{job_id}")
Expand Down Expand Up @@ -754,6 +765,11 @@ def eval_command(
remote: bool = typer.Option(
False, "--remote", help="Submit tasks to platform for remote execution"
),
byok: bool = typer.Option(
False,
"--byok",
help="Remote only: use BYOK keys from encrypted env vars for inference",
),
quiet: bool = typer.Option(
False, "--quiet", "-q", help="Suppress opening browser for eval links"
),
Expand Down Expand Up @@ -790,6 +806,7 @@ def eval_command(
group_size=group_size,
config=config,
remote=remote,
byok=byok,
quiet=quiet,
gateway=gateway,
)
Expand Down
7 changes: 7 additions & 0 deletions hud/datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ class SingleTaskRequest(BaseModel):
description="Additional metadata to inject into the trace context.",
)
trace_id: str | None = Field(default=None, description="Pre-assigned trace ID.")
use_byok: bool = Field(
default=False,
description="If True, use BYOK headers from encrypted env vars for inference.",
)

@model_validator(mode="after")
def _validate_task(self) -> SingleTaskRequest:
Expand Down Expand Up @@ -110,6 +114,7 @@ async def submit_rollouts(
group_size: int = 1,
batch_size: int = 50,
metadata: dict[str, Any] | None = None,
use_byok: bool = False,
) -> None:
"""Submit rollouts to the HUD platform API for remote execution (fire-and-forget).

Expand All @@ -122,6 +127,7 @@ async def submit_rollouts(
group_size: Number of rollouts per task (for variance estimation)
batch_size: Number of rollouts per API batch request
metadata: Additional metadata for each rollout
use_byok: If True, use BYOK keys from encrypted env vars (remote only)
"""
from hud.eval.utils import is_v4_format

Expand Down Expand Up @@ -168,6 +174,7 @@ async def submit_rollouts(
trace_name=trace_name,
group_id=base_task_id if group_size > 1 else None,
metadata=metadata or {},
use_byok=use_byok,
)
)

Expand Down
Loading