Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions hud/agents/openai_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,18 @@ def __init__(self, params: OpenAIChatCreateParams | None = None, **kwargs: Any)
super().__init__(params, **kwargs)
self.config: OpenAIChatConfig

if (
self.config.api_key
and self.config.base_url
and settings.hud_gateway_url in self.config.base_url
and settings.api_key
and self.config.api_key != settings.api_key
):
raise ValueError(
"OpenAIChatAgent api_key is not allowed with HUD Gateway. "
"Use HUD_API_KEY for gateway auth and BYOK headers for provider keys."
)

if self.config.openai_client is not None:
self.oai = self.config.openai_client
elif self.config.api_key is not None or self.config.base_url is not None:
Expand Down
17 changes: 17 additions & 0 deletions hud/cli/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ class AgentPreset:
# max_concurrent = 30
# max_steps = 10
# group_size = 1
# byok = false # Remote only; use encrypted env vars on the platform.
# task_ids = ["task_1", "task_2"]
# verbose = true
# very_verbose = true
Expand Down Expand Up @@ -158,6 +159,7 @@ class EvalConfig(BaseModel):
"verbose",
"very_verbose",
"group_size",
"byok",
"remote",
"auto_respond",
"quiet",
Expand All @@ -178,6 +180,7 @@ class EvalConfig(BaseModel):
very_verbose: bool = False
auto_respond: bool | None = None # Continue without prompting (default: True for --full)
group_size: int = 1
byok: bool = False
remote: bool = False
quiet: bool = False # Suppress opening browser for eval links
gateway: bool = False # Use HUD Gateway for LLM API calls
Expand Down Expand Up @@ -211,6 +214,11 @@ def validate_api_keys(self) -> None:
if self.agent_type is None:
return

# BYOK requires remote execution
if self.byok and not self.remote:
hud_console.error("--byok requires --remote (BYOK only works with remote execution)")
raise typer.Exit(1)

if self.remote:
if not settings.api_key:
hud_console.error("HUD_API_KEY is required for remote execution")
Expand Down Expand Up @@ -559,6 +567,8 @@ def display(self) -> None:
table.add_row("remote", "[bold green]True[/bold green] (submitting to platform)")
if self.gateway:
table.add_row("gateway", "[bold green]True[/bold green] (routing via HUD Gateway)")
if self.byok:
table.add_row("byok", "[bold green]True[/bold green] (remote only)")

# Tool filters (only if set)
if self.allowed_tools:
Expand Down Expand Up @@ -676,6 +686,7 @@ async def _run_evaluation(cfg: EvalConfig) -> tuple[list[Any], list[Any]]:
agent_params=agent_kwargs,
max_steps=max_steps,
group_size=cfg.group_size,
use_byok=cfg.byok,
)

hud_console.success(f"Tasks submitted. View at: https://hud.ai/jobs/{job_id}")
Expand Down Expand Up @@ -754,6 +765,11 @@ def eval_command(
remote: bool = typer.Option(
False, "--remote", help="Submit tasks to platform for remote execution"
),
byok: bool = typer.Option(
False,
"--byok",
help="Remote only: use BYOK keys from encrypted env vars for inference",
),
quiet: bool = typer.Option(
False, "--quiet", "-q", help="Suppress opening browser for eval links"
),
Expand Down Expand Up @@ -790,6 +806,7 @@ def eval_command(
group_size=group_size,
config=config,
remote=remote,
byok=byok,
quiet=quiet,
gateway=gateway,
)
Expand Down
7 changes: 7 additions & 0 deletions hud/datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ class SingleTaskRequest(BaseModel):
description="Additional metadata to inject into the trace context.",
)
trace_id: str | None = Field(default=None, description="Pre-assigned trace ID.")
use_byok: bool = Field(
default=False,
description="If True, use BYOK headers from encrypted env vars for inference.",
)

@model_validator(mode="after")
def _validate_task(self) -> SingleTaskRequest:
Expand Down Expand Up @@ -110,6 +114,7 @@ async def submit_rollouts(
group_size: int = 1,
batch_size: int = 50,
metadata: dict[str, Any] | None = None,
use_byok: bool = False,
) -> None:
"""Submit rollouts to the HUD platform API for remote execution (fire-and-forget).

Expand All @@ -122,6 +127,7 @@ async def submit_rollouts(
group_size: Number of rollouts per task (for variance estimation)
batch_size: Number of rollouts per API batch request
metadata: Additional metadata for each rollout
use_byok: If True, use BYOK keys from encrypted env vars (remote only)
"""
from hud.eval.utils import is_v4_format

Expand Down Expand Up @@ -168,6 +174,7 @@ async def submit_rollouts(
trace_name=trace_name,
group_id=base_task_id if group_size > 1 else None,
metadata=metadata or {},
use_byok=use_byok,
)
)

Expand Down
Loading