Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion benchmarks/gaia/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,8 @@ def evaluate_instance(
conversation.send_message(msg)
else:
conversation.send_message(instruction)
conversation.run()
run_timeout = int(os.getenv("CONVERSATION_TIMEOUT", "3600"))
conversation.run(timeout=run_timeout)

# Extract answer from conversation history
model_answer_raw = self._extract_answer_from_history(conversation.state.events)
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/multiswebench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ def _log_event(ev): # keep it simple
workspace_path=workspace.working_dir,
)
conversation.send_message(instruction)
conversation.run()
run_timeout = int(os.getenv("CONVERSATION_TIMEOUT", "3600"))
conversation.run(timeout=run_timeout)

# git add
workspace.execute_command(f"cd {repo_path} ; git add -A")
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/openagentsafety/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,10 +434,11 @@ def event_callback(event) -> None:
conversation.send_message(instruction)

# Run conversation with error handling
run_timeout = int(os.getenv("CONVERSATION_TIMEOUT", "3600"))
try:
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=UserWarning)
conversation.run()
conversation.run(timeout=run_timeout)
logger.info(f"Conversation completed for {instance.id}")
except ValidationError as e:
logger.warning(f"Validation error from custom events (continuing): {e}")
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/swebench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,8 @@ def _log_event(ev): # keep it simple
workspace_path=workspace.working_dir,
)
conversation.send_message(instruction)
conversation.run()
run_timeout = int(os.getenv("CONVERSATION_TIMEOUT", "3600"))
conversation.run(timeout=run_timeout)

# git add
workspace.execute_command(f"cd {repo_path} ; git add -A")
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/swebenchmultimodal/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,8 @@ def _log_event(ev): # keep it simple
else:
logger.info("No image_assets found, sending text-only instruction")
conversation.send_message(instruction)
conversation.run()
run_timeout = int(os.getenv("CONVERSATION_TIMEOUT", "3600"))
conversation.run(timeout=run_timeout)

# git add
workspace.execute_command(f"cd {repo_path} ; git add -A")
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/swtbench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,8 @@ def _log_event(ev): # keep it simple
workspace_path=workspace.working_dir,
)
conversation.send_message(instruction)
conversation.run()
run_timeout = int(os.getenv("CONVERSATION_TIMEOUT", "3600"))
conversation.run(timeout=run_timeout)

# git add
workspace.execute_command(f"cd {repo_path} ; git add -A")
Expand Down