diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a125d720f8..5031d7106a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -57,8 +57,10 @@ jobs: run: | # Clean up any existing coverage file rm -f .coverage + # Use pytest-xdist (-n auto) for parallel execution with proper + # coverage collection. --forked prevents coverage from child processes. CI=true uv run python -m pytest -vvs \ - --forked \ + -n auto \ --cov=openhands-sdk \ --cov-report=term-missing \ --cov-fail-under=0 \ @@ -112,6 +114,8 @@ jobs: run: | # Clean up any existing coverage file rm -f .coverage + # Use --forked for tools tests due to terminal test conflicts + # when running in parallel (shared /tmp paths, subprocess management) CI=true uv run python -m pytest -vvs \ --forked \ --cov=openhands-tools \ @@ -166,8 +170,10 @@ jobs: run: | # Clean up any existing coverage file rm -f .coverage + # Use pytest-xdist (-n auto) for parallel execution with proper + # coverage collection. --forked prevents coverage from child processes. CI=true uv run python -m pytest -vvs \ - --forked \ + -n auto \ --cov=openhands-agent-server \ --cov-report=term-missing \ --cov-fail-under=0 \ diff --git a/tests/sdk/agent/test_fix_malformed_tool_arguments.py b/tests/sdk/agent/test_fix_malformed_tool_arguments.py index 6a2ef28e94..9d5dda5e66 100644 --- a/tests/sdk/agent/test_fix_malformed_tool_arguments.py +++ b/tests/sdk/agent/test_fix_malformed_tool_arguments.py @@ -44,6 +44,19 @@ class JsonDecodingOptionalAction(Action): config: dict[str, int] | None = Field(default=None, description="Optional dict") +class _NestedActionForMalformedArgs(Action): + """Action with nested structures for testing JSON decoding. + + This class is defined at module level (rather than inside a test function) to + ensure it's importable by Pydantic during serialization/deserialization. + Defining it inside a test function causes test pollution when running tests + in parallel with pytest-xdist. + """ + + nested_list: list[list[int]] = Field(description="Nested list") + nested_dict: dict[str, dict[str, str]] = Field(description="Nested dict") + + def test_decode_json_string_list(): """Test that JSON string lists are decoded to native lists.""" data = { @@ -201,17 +214,12 @@ def test_json_string_with_wrong_type_rejected(): def test_nested_structures(): """Test that nested lists and dicts in JSON strings work.""" - - class NestedAction(Action): - nested_list: list[list[int]] = Field(description="Nested list") - nested_dict: dict[str, dict[str, str]] = Field(description="Nested dict") - data = { "nested_list": "[[1, 2], [3, 4]]", "nested_dict": '{"outer": {"inner": "value"}}', } - fixed_data = fix_malformed_tool_arguments(data, NestedAction) - action = NestedAction.model_validate(fixed_data) + fixed_data = fix_malformed_tool_arguments(data, _NestedActionForMalformedArgs) + action = _NestedActionForMalformedArgs.model_validate(fixed_data) assert action.nested_list == [[1, 2], [3, 4]] assert action.nested_dict == {"outer": {"inner": "value"}} diff --git a/tests/sdk/conversation/local/test_state_serialization.py b/tests/sdk/conversation/local/test_state_serialization.py index eaee970b08..fd6fe2765d 100644 --- a/tests/sdk/conversation/local/test_state_serialization.py +++ b/tests/sdk/conversation/local/test_state_serialization.py @@ -9,11 +9,16 @@ from pydantic import SecretStr, ValidationError from openhands.sdk import Agent, Conversation +from openhands.sdk.agent.base import AgentBase from openhands.sdk.conversation.impl.local_conversation import LocalConversation from openhands.sdk.conversation.state import ( ConversationExecutionStatus, ConversationState, ) +from openhands.sdk.conversation.types import ( + ConversationCallbackType, + ConversationTokenCallbackType, +) from openhands.sdk.event.llm_convertible import MessageEvent, SystemPromptEvent from openhands.sdk.llm import LLM, Message, TextContent from openhands.sdk.llm.llm_registry import RegistryEvent @@ -21,6 +26,35 @@ from openhands.sdk.workspace import LocalWorkspace +class _DifferentAgentForVerifyTest(AgentBase): + """A different agent class used to test Agent.verify() rejects class mismatches. + + This class is defined at module level (rather than inside a test function) to + ensure it's importable by Pydantic during serialization/deserialization. + Defining it inside a test function causes test pollution when running tests + in parallel with pytest-xdist. + """ + + def __init__(self): + llm = LLM( + model="gpt-4o-mini", + api_key=SecretStr("test-key"), + usage_id="test-llm", + ) + super().__init__(llm=llm, tools=[]) + + def init_state(self, state, on_event): + pass + + def step( + self, + conversation, + on_event: ConversationCallbackType, + on_token: ConversationTokenCallbackType | None = None, + ): + pass + + def test_conversation_state_basic_serialization(): """Test basic ConversationState serialization and deserialization.""" llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm") @@ -490,35 +524,9 @@ def test_agent_verify_allows_different_llm(): def test_agent_verify_different_class_raises_error(): """Test that agent.verify() raises error for different agent classes.""" - from openhands.sdk.agent.base import AgentBase - from openhands.sdk.conversation.types import ( - ConversationCallbackType, - ConversationTokenCallbackType, - ) - - class DifferentAgent(AgentBase): - def __init__(self): - llm = LLM( - model="gpt-4o-mini", - api_key=SecretStr("test-key"), - usage_id="test-llm", - ) - super().__init__(llm=llm, tools=[]) - - def init_state(self, state, on_event): - pass - - def step( - self, - conversation, - on_event: ConversationCallbackType, - on_token: ConversationTokenCallbackType | None = None, - ): - pass - llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm") original_agent = Agent(llm=llm, tools=[]) - different_agent = DifferentAgent() + different_agent = _DifferentAgentForVerifyTest() with pytest.raises(ValueError, match="Cannot load from persisted"): original_agent.verify(different_agent) diff --git a/tests/sdk/conversation/test_visualizer.py b/tests/sdk/conversation/test_visualizer.py index c88544ac1f..373bb01eeb 100644 --- a/tests/sdk/conversation/test_visualizer.py +++ b/tests/sdk/conversation/test_visualizer.py @@ -21,6 +21,8 @@ SystemPromptEvent, UserRejectObservation, ) +from openhands.sdk.event.base import Event +from openhands.sdk.event.types import SourceType from openhands.sdk.llm import ( Message, MessageToolCall, @@ -33,6 +35,18 @@ from openhands.sdk.conversation.impl.local_conversation import LocalConversation +class _UnknownEventForVisualizerTest(Event): + """Unknown event type for testing fallback visualization. + + This class is defined at module level (rather than inside a test function) to + ensure it's importable by Pydantic during serialization/deserialization. + Defining it inside a test function causes test pollution when running tests + in parallel with pytest-xdist. + """ + + source: SourceType = "agent" + + class VisualizerMockAction(Action): """Mock action for testing.""" @@ -457,18 +471,12 @@ def test_metrics_abbreviation_formatting(): def test_event_base_fallback_visualize(): """Test that Event provides fallback visualization.""" - from openhands.sdk.event.base import Event - from openhands.sdk.event.types import SourceType - - class UnknownEvent(Event): - source: SourceType = "agent" - - event = UnknownEvent() + event = _UnknownEventForVisualizerTest() result = event.visualize assert isinstance(result, Text) text_content = result.plain - assert "Unknown event type: UnknownEvent" in text_content + assert "Unknown event type: _UnknownEventForVisualizerTest" in text_content def test_visualizer_conversation_state_update_event_skipped(): diff --git a/tests/sdk/event/test_event_immutability.py b/tests/sdk/event/test_event_immutability.py index 8a231fd92d..4a8ece8742 100644 --- a/tests/sdk/event/test_event_immutability.py +++ b/tests/sdk/event/test_event_immutability.py @@ -75,13 +75,21 @@ def create(cls, *args, **kwargs) -> Sequence[Self]: ] -def test_event_base_is_frozen(): - """Test that Event instances are frozen and cannot be modified.""" +class _TestEventForImmutability(Event): + """Test event class for immutability tests. + + This class is defined at module level (rather than inside a test function) to + ensure it's importable by Pydantic during serialization/deserialization. + Defining it inside a test function causes test pollution when running tests + in parallel with pytest-xdist. + """ - class TestEvent(Event): - test_field: str = "test_value" + test_field: str = "test_value" - event = TestEvent(source="agent", test_field="initial_value") + +def test_event_base_is_frozen(): + """Test that Event instances are frozen and cannot be modified.""" + event = _TestEventForImmutability(source="agent", test_field="initial_value") # Test that we cannot modify any field with pytest.raises(Exception): # Pydantic raises ValidationError for frozen models diff --git a/tests/sdk/llm/test_reasoning_content.py b/tests/sdk/llm/test_reasoning_content.py index 41cedcbfb9..39d6dddb7a 100644 --- a/tests/sdk/llm/test_reasoning_content.py +++ b/tests/sdk/llm/test_reasoning_content.py @@ -2,6 +2,20 @@ from litellm.types.utils import Choices, Message as LiteLLMMessage, ModelResponse, Usage +from openhands.sdk.tool import Action + + +class _TestActionForReasoningContent(Action): + """A test action used for testing reasoning content in ActionEvent. + + This class is defined at module level (rather than inside a test function) to + ensure it's importable by Pydantic during serialization/deserialization. + Defining it inside a test function causes test pollution when running tests + in parallel with pytest-xdist. + """ + + action: str = "test" + def create_mock_response(content: str = "Test response", response_id: str = "test-id"): """Helper function to create properly structured mock responses.""" @@ -113,11 +127,6 @@ def test_action_event_with_reasoning_content(): MessageToolCall, TextContent, ) - from openhands.sdk.tool import Action - - # Create a simple action for testing - class TestAction(Action): - action: str = "test" # Create a tool call tool_call = MessageToolCall( @@ -129,7 +138,7 @@ class TestAction(Action): action_event = ActionEvent( thought=[TextContent(text="I need to test this")], - action=TestAction(), + action=_TestActionForReasoningContent(), tool_name="test_tool", tool_call_id="test-id", tool_call=tool_call, diff --git a/tests/sdk/mcp/test_mcp_action_serialization.py b/tests/sdk/mcp/test_mcp_action_serialization.py index 288953b50f..502686470c 100644 --- a/tests/sdk/mcp/test_mcp_action_serialization.py +++ b/tests/sdk/mcp/test_mcp_action_serialization.py @@ -4,6 +4,18 @@ from openhands.sdk.mcp import MCPToolAction +class _ChildMCPToolActionForSerialization(MCPToolAction): + """Child MCP action for testing declared fields with data. + + This class is defined at module level (rather than inside a test function) to + ensure it's importable by Pydantic during serialization/deserialization. + Defining it inside a test function causes test pollution when running tests + in parallel with pytest-xdist. + """ + + declared: int + + def test_data_field_emerges_from_to_mcp_arguments(): """Test that data field contents are returned by to_mcp_arguments.""" data = {"new_field": "value", "dynamic": 123} @@ -18,12 +30,8 @@ def test_data_field_emerges_from_to_mcp_arguments(): def test_declared_child_fields_with_data(): """Test that child classes work with the data field.""" - - class Child(MCPToolAction): - declared: int - data = {"tool_param": "value"} - a = Child(declared=7, data=data) + a = _ChildMCPToolActionForSerialization(declared=7, data=data) out = a.to_mcp_arguments() # Only data field contents should be in MCP arguments diff --git a/tests/sdk/tool/test_schema_immutability.py b/tests/sdk/tool/test_schema_immutability.py index d6df9d129a..eff2aca134 100644 --- a/tests/sdk/tool/test_schema_immutability.py +++ b/tests/sdk/tool/test_schema_immutability.py @@ -53,6 +53,34 @@ def to_llm_content(self) -> Sequence[TextContent | ImageContent]: return [TextContent(text=f"Result: {self.result}, Status: {self.status}")] +class _SchemaImmutabilityCustomAction(Action): + """Custom action for testing schema inheritance immutability. + + This class is defined at module level (rather than inside a test function) to + ensure it's importable by Pydantic during serialization/deserialization. + Defining it inside a test function causes test pollution when running tests + in parallel with pytest-xdist. + """ + + custom_field: str = Field(description="Custom field") + + +class _SchemaImmutabilityCustomObservation(Observation): + """Custom observation for testing schema inheritance immutability. + + This class is defined at module level (rather than inside a test function) to + ensure it's importable by Pydantic during serialization/deserialization. + Defining it inside a test function causes test pollution when running tests + in parallel with pytest-xdist. + """ + + custom_result: str = Field(description="Custom result") + + @property + def to_llm_content(self) -> Sequence[TextContent | ImageContent]: + return [TextContent(text=self.custom_result)] + + def test_schema_is_frozen(): """Test that Schema instances are frozen and cannot be modified.""" schema = MockSchema(name="test", value=42) @@ -273,22 +301,11 @@ def test_all_schema_classes_are_frozen(): def test_schema_inheritance_preserves_immutability(): """Test that classes inheriting from schema bases are also immutable.""" - - class SchemaImmutabilityCustomAction(Action): - custom_field: str = Field(description="Custom field") - - class SchemaImmutabilityCustomObservation(Observation): - custom_result: str = Field(description="Custom result") - - @property - def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - return [TextContent(text=self.custom_result)] - # Test that custom classes are also frozen - custom_action = SchemaImmutabilityCustomAction(custom_field="test") + custom_action = _SchemaImmutabilityCustomAction(custom_field="test") with pytest.raises(ValidationError, match="Instance is frozen"): custom_action.custom_field = "changed" - custom_obs = SchemaImmutabilityCustomObservation(custom_result="test") + custom_obs = _SchemaImmutabilityCustomObservation(custom_result="test") with pytest.raises(ValidationError, match="Instance is frozen"): custom_obs.custom_result = "changed"