diff --git a/tests/integration/README.md b/tests/integration/README.md index b46722aaa5..322c7475ec 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -106,3 +106,9 @@ All integration tests inherit from `BaseIntegrationTest` in `base.py`. The base - **`max_iteration_per_run`** (property) - Maximum iterations per conversation (default: `100`) - Override to limit LLM calls for faster tests - Useful for tests that should complete quickly + +### Conversation Control + +The standard way to define an integration test is to set the `INSTRUCTION` class variable. These instructions are sent to the agent as the first user message. + +However, if the functionality being tested requires multiple instructions or accessing the conversation object mid-test then the test can instead be defined by overriding the `run_instructions` method. This method provides a `LocalConversation` object that can be manipulated directly by sending messages, triggering condensations, and the like. \ No newline at end of file diff --git a/tests/integration/base.py b/tests/integration/base.py index c633d367af..6f7a759aaf 100644 --- a/tests/integration/base.py +++ b/tests/integration/base.py @@ -128,7 +128,7 @@ def conversation_callback(self, event: Event): self.early_stop_result = result self.conversation.pause() # Trigger graceful stop - def run_instruction(self) -> TestResult: + def run_integration_test(self) -> TestResult: """ Run user instruction through the agent and verify results. @@ -149,12 +149,7 @@ def run_instruction(self) -> TestResult: stderr_buffer = StringIO() with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer): - self.conversation.send_message( - message=Message( - role="user", content=[TextContent(text=self.instruction)] - ) - ) - self.conversation.run() + self.run_instructions(self.conversation) # Save captured output to log file captured_output = stdout_buffer.getvalue() @@ -194,6 +189,16 @@ def run_instruction(self) -> TestResult: finally: self.teardown() + def run_instructions(self, conversation: LocalConversation) -> None: + """Feed user instructions to the agent and manage the conversation.""" + conversation.send_message(message=self.instruction_message) + conversation.run() + + @property + def instruction_message(self) -> Message: + """The initial instruction message for the agent.""" + return Message(role="user", content=[TextContent(text=self.instruction)]) + @property @abstractmethod def tools(self) -> list[Tool]: diff --git a/tests/integration/run_infer.py b/tests/integration/run_infer.py index 8deb07568c..3000e568ce 100755 --- a/tests/integration/run_infer.py +++ b/tests/integration/run_infer.py @@ -148,7 +148,7 @@ def process_instance(instance: TestInstance, llm_config: dict[str, Any]) -> Eval # Run the test start_time = time.time() - test_result = test_instance.run_instruction() + test_result = test_instance.run_integration_test() end_time = time.time() # Access accumulated_cost from the metrics object where it's properly validated