Skip to content

Commit 16d5685

Browse files
authored
Merge pull request rllm-org#109 from alex-remedios-aisi/main
Add k8s support for gaia
2 parents 9707791 + 473a792 commit 16d5685

File tree

2 files changed

+20
-4
lines changed

2 files changed

+20
-4
lines changed

src/inspect_evals/gaia/gaia.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,18 @@
55
from inspect_ai import Task, task
66
from inspect_ai.solver import Solver, basic_agent, system_message
77
from inspect_ai.tool import bash, python, web_browser
8+
from inspect_ai.util._sandbox.environment import SandboxEnvironmentType
89

910
from .dataset import gaia_dataset
1011
from .scorer import gaia_scorer
1112

13+
TASK_DIR = Path(__file__).parent
14+
COMPOSE_FILE = TASK_DIR / "compose.yaml"
15+
VALUES_FILE = TASK_DIR / "values.yaml"
16+
17+
DEFAULT_DOCKER_SANDBOX = ("docker", COMPOSE_FILE.as_posix())
18+
DEFAULT_K8S_SANDBOX = ("k8s", VALUES_FILE.as_posix())
19+
1220

1321
@task
1422
def gaia(
@@ -21,6 +29,7 @@ def gaia(
2129
] = "2023_all",
2230
split: Literal["test", "validation"] = "validation",
2331
instance_ids: str | list[str] | None = None,
32+
sandbox: SandboxEnvironmentType = DEFAULT_DOCKER_SANDBOX,
2433
) -> Task:
2534
"""GAIA task.
2635
@@ -34,6 +43,7 @@ def gaia(
3443
subset: Which GAIA subset to evaluate (defaults to 2023_all).
3544
split: Which split to evaluate ("validation" or "test")
3645
instance_ids: Specific question instances to evaluated.
46+
sandbox: Sandbox environment to use for the task.
3747
3848
Returns:
3949
GAIA Inspect task.
@@ -56,15 +66,12 @@ def gaia(
5666
# resolve scorer (test split has no answers)
5767
scorer = gaia_scorer() if split == "validation" else None
5868

59-
# docker compose file is alongside the src file
60-
COMPOSE_FILE = Path(__file__).parent / "compose.yaml"
61-
6269
# return task
6370
return Task(
6471
dataset=dataset,
6572
plan=solver,
6673
scorer=scorer,
67-
sandbox=("docker", COMPOSE_FILE.as_posix()),
74+
sandbox=sandbox,
6875
)
6976

7077

src/inspect_evals/gaia/values.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
services:
2+
default:
3+
image: aisiuk/inspect-web-browser-tool
4+
command:
5+
- python3
6+
args:
7+
- /app/web_browser/web_server.py
8+
allowDomains:
9+
- "*"

0 commit comments

Comments
 (0)