OpenHands · csmith49 · Jan 7, 2026 · Jan 5, 2026 · Jan 5, 2026 · Jan 6, 2026
diff --git a/tests/integration/README.md b/tests/integration/README.md
@@ -106,3 +106,9 @@ All integration tests inherit from `BaseIntegrationTest` in `base.py`. The base
 - **`max_iteration_per_run`** (property) - Maximum iterations per conversation (default: `100`)
   - Override to limit LLM calls for faster tests
   - Useful for tests that should complete quickly
+
+### Conversation Control
+
+The standard way to define an integration test is to set the `INSTRUCTION` class variable. These instructions are sent to the agent as the first user message.
+
+However, if the functionality being tested requires multiple instructions or accessing the conversation object mid-test then the test can instead be defined by overriding the `run_instructions` method. This method provides a `LocalConversation` object that can be manipulated directly by sending messages, triggering condensations, and the like.
diff --git a/tests/integration/base.py b/tests/integration/base.py
@@ -128,7 +128,7 @@ def conversation_callback(self, event: Event):
                 self.early_stop_result = result
                 self.conversation.pause()  # Trigger graceful stop
 
-    def run_instruction(self) -> TestResult:
+    def run_integration_test(self) -> TestResult:
         """
         Run user instruction through the agent and verify results.
 
@@ -149,12 +149,7 @@ def run_instruction(self) -> TestResult:
             stderr_buffer = StringIO()
 
             with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
-                self.conversation.send_message(
-                    message=Message(
-                        role="user", content=[TextContent(text=self.instruction)]
-                    )
-                )
-                self.conversation.run()
+                self.run_instructions(self.conversation)
 
             # Save captured output to log file
             captured_output = stdout_buffer.getvalue()
@@ -194,6 +189,16 @@ def run_instruction(self) -> TestResult:
         finally:
             self.teardown()
 
+    def run_instructions(self, conversation: LocalConversation) -> None:
+        """Feed user instructions to the agent and manage the conversation."""
+        conversation.send_message(message=self.instruction_message)
+        conversation.run()
+
+    @property
+    def instruction_message(self) -> Message:
+        """The initial instruction message for the agent."""
+        return Message(role="user", content=[TextContent(text=self.instruction)])
+
     @property
     @abstractmethod
     def tools(self) -> list[Tool]:

diff --git a/tests/integration/run_infer.py b/tests/integration/run_infer.py
@@ -148,7 +148,7 @@ def process_instance(instance: TestInstance, llm_config: dict[str, Any]) -> Eval
 
         # Run the test
         start_time = time.time()
-        test_result = test_instance.run_instruction()
+        test_result = test_instance.run_integration_test()
         end_time = time.time()
 
         # Access accumulated_cost from the metrics object where it's properly validated