55from inspect_ai import Task , task
66from inspect_ai .solver import Solver , basic_agent , system_message
77from inspect_ai .tool import bash , python , web_browser
8+ from inspect_ai .util ._sandbox .environment import SandboxEnvironmentType
89
910from .dataset import gaia_dataset
1011from .scorer import gaia_scorer
1112
13+ TASK_DIR = Path (__file__ ).parent
14+ COMPOSE_FILE = TASK_DIR / "compose.yaml"
15+ VALUES_FILE = TASK_DIR / "values.yaml"
16+
17+ DEFAULT_DOCKER_SANDBOX = ("docker" , COMPOSE_FILE .as_posix ())
18+ DEFAULT_K8S_SANDBOX = ("k8s" , VALUES_FILE .as_posix ())
19+
1220
1321@task
1422def gaia (
@@ -21,6 +29,7 @@ def gaia(
2129 ] = "2023_all" ,
2230 split : Literal ["test" , "validation" ] = "validation" ,
2331 instance_ids : str | list [str ] | None = None ,
32+ sandbox : SandboxEnvironmentType = DEFAULT_DOCKER_SANDBOX ,
2433) -> Task :
2534 """GAIA task.
2635
@@ -34,6 +43,7 @@ def gaia(
3443 subset: Which GAIA subset to evaluate (defaults to 2023_all).
3544 split: Which split to evaluate ("validation" or "test")
3645 instance_ids: Specific question instances to evaluated.
46+ sandbox: Sandbox environment to use for the task.
3747
3848 Returns:
3949 GAIA Inspect task.
@@ -56,15 +66,12 @@ def gaia(
5666 # resolve scorer (test split has no answers)
5767 scorer = gaia_scorer () if split == "validation" else None
5868
59- # docker compose file is alongside the src file
60- COMPOSE_FILE = Path (__file__ ).parent / "compose.yaml"
61-
6269 # return task
6370 return Task (
6471 dataset = dataset ,
6572 plan = solver ,
6673 scorer = scorer ,
67- sandbox = ( "docker" , COMPOSE_FILE . as_posix ()) ,
74+ sandbox = sandbox ,
6875 )
6976
7077
0 commit comments