Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions tests/integration/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,9 @@ All integration tests inherit from `BaseIntegrationTest` in `base.py`. The base
- **`max_iteration_per_run`** (property) - Maximum iterations per conversation (default: `100`)
- Override to limit LLM calls for faster tests
- Useful for tests that should complete quickly

### Conversation Control

The standard way to define an integration test is to set the `INSTRUCTION` class variable. These instructions are sent to the agent as the first user message.

However, if the functionality being tested requires multiple instructions or accessing the conversation object mid-test then the test can instead be defined by overriding the `run_instructions` method. This method provides a `LocalConversation` object that can be manipulated directly by sending messages, triggering condensations, and the like.
19 changes: 12 additions & 7 deletions tests/integration/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def conversation_callback(self, event: Event):
self.early_stop_result = result
self.conversation.pause() # Trigger graceful stop

def run_instruction(self) -> TestResult:
def run_integration_test(self) -> TestResult:
"""
Run user instruction through the agent and verify results.

Expand All @@ -149,12 +149,7 @@ def run_instruction(self) -> TestResult:
stderr_buffer = StringIO()

with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
self.conversation.send_message(
message=Message(
role="user", content=[TextContent(text=self.instruction)]
)
)
self.conversation.run()
self.run_instructions(self.conversation)

# Save captured output to log file
captured_output = stdout_buffer.getvalue()
Expand Down Expand Up @@ -194,6 +189,16 @@ def run_instruction(self) -> TestResult:
finally:
self.teardown()

def run_instructions(self, conversation: LocalConversation) -> None:
"""Feed user instructions to the agent and manage the conversation."""
conversation.send_message(message=self.instruction_message)
conversation.run()

@property
def instruction_message(self) -> Message:
"""The initial instruction message for the agent."""
return Message(role="user", content=[TextContent(text=self.instruction)])

@property
@abstractmethod
def tools(self) -> list[Tool]:
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def process_instance(instance: TestInstance, llm_config: dict[str, Any]) -> Eval

# Run the test
start_time = time.time()
test_result = test_instance.run_instruction()
test_result = test_instance.run_integration_test()
end_time = time.time()

# Access accumulated_cost from the metrics object where it's properly validated
Expand Down
Loading