From 975627ead4595f5ce04800b0414fe90c9fe4fb71 Mon Sep 17 00:00:00 2001 From: Daniel Zayas Date: Thu, 4 Dec 2025 13:20:08 -0800 Subject: [PATCH] add timeout handling & filtering for RepoLaunch setup --- curation/filter_timeouts.py | 83 +++++++++++++++++++++++++++++++++++ launch/launch/agent/setup.py | 23 +++++++--- launch/launch/agent/verify.py | 6 ++- launch/launch/runtime.py | 2 +- 4 files changed, 105 insertions(+), 9 deletions(-) create mode 100644 curation/filter_timeouts.py diff --git a/curation/filter_timeouts.py b/curation/filter_timeouts.py new file mode 100644 index 0000000..60aae9c --- /dev/null +++ b/curation/filter_timeouts.py @@ -0,0 +1,83 @@ +""" +Filter Stage 2 raw_tasks.jsonl by removing instances that timed out in Stage 3. + +Usage: + python curation/filter_timeouts.py \ + --raw-tasks curation/output/raw_tasks.jsonl \ + --timeouts curation/output/timeouts.txt \ + --output curation/output/raw_tasks.filtered.jsonl + +The timeouts file should contain one instance_id per line. +""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Remove timed-out instance IDs from raw_tasks.jsonl" + ) + parser.add_argument( + "--raw-tasks", + type=Path, + default=Path("curation/output/raw_tasks.jsonl"), + help="Path to Stage 2 raw_tasks.jsonl", + ) + parser.add_argument( + "--timeouts", + type=Path, + default=Path("curation/output/timeouts.txt"), + help="File containing one instance_id per line to exclude", + ) + parser.add_argument( + "--output", + type=Path, + default=Path("curation/output/raw_tasks.filtered.jsonl"), + help="Destination JSONL after filtering", + ) + return parser.parse_args() + + +def load_timeouts(path: Path) -> set[str]: + if not path.exists(): + return set() + ids: set[str] = set() + for line in path.read_text().splitlines(): + stripped = line.strip() + if stripped: + ids.add(stripped) + return ids + + +def main() -> None: + args = parse_args() + timeouts = load_timeouts(args.timeouts) + + kept = [] + dropped = 0 + with args.raw_tasks.open() as src: + for line in src: + line = line.rstrip("\n") + if not line: + continue + obj = json.loads(line) + if obj.get("instance_id") in timeouts: + dropped += 1 + continue + kept.append(line) + + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text("\n".join(kept) + ("\n" if kept else "")) + + print( + f"Filtered {dropped} instance(s); wrote {len(kept)} entries to {args.output}" + ) + + +if __name__ == "__main__": + main() + diff --git a/launch/launch/agent/setup.py b/launch/launch/agent/setup.py index 1bf26f4..d6b05c4 100644 --- a/launch/launch/agent/setup.py +++ b/launch/launch/agent/setup.py @@ -12,7 +12,7 @@ from launch.agent.action_parser import ActionParser from launch.agent.prompt import ReAct_prompt from launch.agent.state import AgentState, auto_catch -from launch.runtime import start_session +from launch.runtime import TIMEOUT_EXIT_CODE, start_session from launch.utilities.language_handlers import get_language_handler system_msg = """You are a developer. Your task is to install dependencies and set up a environment that is able to run the tests of the project. @@ -57,6 +57,9 @@ class SetupObservation(BaseModel): content: str = Field("", description="The content of the observation") is_stop: bool = Field(False, description="Whether stop the setup loop") + exit_code: int | None = Field( + None, description="Exit code of the last executed command, if any" + ) class SetupActionParser(ActionParser): @@ -108,7 +111,10 @@ def observation_for_setup_action( if action.action == "command": session = state["session"] result = session.send_command(action.args) - return SetupObservation(content=result.to_observation(), is_stop=False) + exit_code = result.metadata.exit_code if result.metadata else None + return SetupObservation( + content=result.to_observation(), is_stop=False, exit_code=exit_code + ) if action.action == "search": result = state["search_tool"].invoke(action.args) return SetupObservation(content=json.dumps(result), is_stop=False) @@ -202,8 +208,8 @@ def setup(max_steps: int, state: AgentState) -> dict: commands = [] step = 0 while step < max_steps: - if time.time() - state["start_time"] > 30 * 60: - raise TimeoutError("Reached global timeout of 30 minutes") + if time.time() - state["start_time"] > 10 * 60: + raise TimeoutError("Reached global timeout of 10 minutes") step += 1 # uses a window to avoid exceed context commands_history = HumanMessage( @@ -226,17 +232,22 @@ def setup(max_steps: int, state: AgentState) -> dict: # print(response.pretty_repr()) - logger.info("\n" + response.pretty_repr()) + if state.get("debug"): + logger.debug("\n" + response.pretty_repr()) messages.append(response) action = parse_setup_action(response.content) if action and action.action == "command": commands.append(action.args) observation = observation_for_setup_action(state, action) + if observation.exit_code == TIMEOUT_EXIT_CODE: + logger.error("Setup command timed out (exit code %s)", observation.exit_code) + raise TimeoutError("Setup command timed out (exit code 124)") if observation.is_stop: break message = HumanMessage(f"Observation:\n{observation.content}") # print(observation.content) - logger.info("\n" + message.pretty_repr()) + if state.get("debug"): + logger.debug("\n" + message.pretty_repr()) messages.append(message) logger.info("-" * 10 + "End setup conversation" + "-" * 10) diff --git a/launch/launch/agent/verify.py b/launch/launch/agent/verify.py index d5227f9..d532c6c 100644 --- a/launch/launch/agent/verify.py +++ b/launch/launch/agent/verify.py @@ -168,7 +168,8 @@ def verify(max_steps: int, state: AgentState) -> dict: ) response = llm.invoke(input_messages) # print(response.pretty_repr()) - logger.info(response.pretty_repr()) + if state.get("debug"): + logger.debug(response.pretty_repr()) messages.append(response) action = parse_verify_action(response.content) if action.action == "command": @@ -176,7 +177,8 @@ def verify(max_steps: int, state: AgentState) -> dict: observation = observation_for_verify_action(action, session) message = HumanMessage(f"Observation:\n{observation.content}") # print(message.pretty_repr()) - logger.info(message.pretty_repr()) + if state.get("debug"): + logger.debug(message.pretty_repr()) messages.append(message) if action.action == "issue": if observation.content == "": diff --git a/launch/launch/runtime.py b/launch/launch/runtime.py index 1db717b..a203e0a 100644 --- a/launch/launch/runtime.py +++ b/launch/launch/runtime.py @@ -251,7 +251,7 @@ def _combine_outputs_between_matches( output_segments.append(output_segment) return "\n".join(output_segments) + "\n" if output_segments else "" - def send_command(self, command: str, timeout: float = 20 * 60) -> CommandResult: + def send_command(self, command: str, timeout: float = 5 * 60) -> CommandResult: if not command.endswith("\n"): command += "\n"