Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,10 @@ jobs:
run: |
# Clean up any existing coverage file
rm -f .coverage
# Use pytest-xdist (-n auto) for parallel execution with proper
# coverage collection. --forked prevents coverage from child processes.
CI=true uv run python -m pytest -vvs \
--forked \
-n auto \
--cov=openhands-sdk \
--cov-report=term-missing \
--cov-fail-under=0 \
Expand Down Expand Up @@ -112,6 +114,8 @@ jobs:
run: |
# Clean up any existing coverage file
rm -f .coverage
# Use --forked for tools tests due to terminal test conflicts
# when running in parallel (shared /tmp paths, subprocess management)
CI=true uv run python -m pytest -vvs \
--forked \
--cov=openhands-tools \
Expand Down Expand Up @@ -166,8 +170,10 @@ jobs:
run: |
# Clean up any existing coverage file
rm -f .coverage
# Use pytest-xdist (-n auto) for parallel execution with proper
# coverage collection. --forked prevents coverage from child processes.
CI=true uv run python -m pytest -vvs \
--forked \
-n auto \
--cov=openhands-agent-server \
--cov-report=term-missing \
--cov-fail-under=0 \
Expand Down
22 changes: 15 additions & 7 deletions tests/sdk/agent/test_fix_malformed_tool_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,19 @@ class JsonDecodingOptionalAction(Action):
config: dict[str, int] | None = Field(default=None, description="Optional dict")


class _NestedActionForMalformedArgs(Action):
"""Action with nested structures for testing JSON decoding.

This class is defined at module level (rather than inside a test function) to
ensure it's importable by Pydantic during serialization/deserialization.
Defining it inside a test function causes test pollution when running tests
in parallel with pytest-xdist.
"""

nested_list: list[list[int]] = Field(description="Nested list")
nested_dict: dict[str, dict[str, str]] = Field(description="Nested dict")


def test_decode_json_string_list():
"""Test that JSON string lists are decoded to native lists."""
data = {
Expand Down Expand Up @@ -201,17 +214,12 @@ def test_json_string_with_wrong_type_rejected():

def test_nested_structures():
"""Test that nested lists and dicts in JSON strings work."""

class NestedAction(Action):
nested_list: list[list[int]] = Field(description="Nested list")
nested_dict: dict[str, dict[str, str]] = Field(description="Nested dict")

data = {
"nested_list": "[[1, 2], [3, 4]]",
"nested_dict": '{"outer": {"inner": "value"}}',
}
fixed_data = fix_malformed_tool_arguments(data, NestedAction)
action = NestedAction.model_validate(fixed_data)
fixed_data = fix_malformed_tool_arguments(data, _NestedActionForMalformedArgs)
action = _NestedActionForMalformedArgs.model_validate(fixed_data)

assert action.nested_list == [[1, 2], [3, 4]]
assert action.nested_dict == {"outer": {"inner": "value"}}
Expand Down
62 changes: 35 additions & 27 deletions tests/sdk/conversation/local/test_state_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,52 @@
from pydantic import SecretStr, ValidationError

from openhands.sdk import Agent, Conversation
from openhands.sdk.agent.base import AgentBase
from openhands.sdk.conversation.impl.local_conversation import LocalConversation
from openhands.sdk.conversation.state import (
ConversationExecutionStatus,
ConversationState,
)
from openhands.sdk.conversation.types import (
ConversationCallbackType,
ConversationTokenCallbackType,
)
from openhands.sdk.event.llm_convertible import MessageEvent, SystemPromptEvent
from openhands.sdk.llm import LLM, Message, TextContent
from openhands.sdk.llm.llm_registry import RegistryEvent
from openhands.sdk.security.confirmation_policy import AlwaysConfirm
from openhands.sdk.workspace import LocalWorkspace


class _DifferentAgentForVerifyTest(AgentBase):
"""A different agent class used to test Agent.verify() rejects class mismatches.

This class is defined at module level (rather than inside a test function) to
ensure it's importable by Pydantic during serialization/deserialization.
Defining it inside a test function causes test pollution when running tests
in parallel with pytest-xdist.
"""

def __init__(self):
llm = LLM(
model="gpt-4o-mini",
api_key=SecretStr("test-key"),
usage_id="test-llm",
)
super().__init__(llm=llm, tools=[])

def init_state(self, state, on_event):
pass

def step(
self,
conversation,
on_event: ConversationCallbackType,
on_token: ConversationTokenCallbackType | None = None,
):
pass


def test_conversation_state_basic_serialization():
"""Test basic ConversationState serialization and deserialization."""
llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm")
Expand Down Expand Up @@ -490,35 +524,9 @@ def test_agent_verify_allows_different_llm():

def test_agent_verify_different_class_raises_error():
"""Test that agent.verify() raises error for different agent classes."""
from openhands.sdk.agent.base import AgentBase
from openhands.sdk.conversation.types import (
ConversationCallbackType,
ConversationTokenCallbackType,
)

class DifferentAgent(AgentBase):
def __init__(self):
llm = LLM(
model="gpt-4o-mini",
api_key=SecretStr("test-key"),
usage_id="test-llm",
)
super().__init__(llm=llm, tools=[])

def init_state(self, state, on_event):
pass

def step(
self,
conversation,
on_event: ConversationCallbackType,
on_token: ConversationTokenCallbackType | None = None,
):
pass

llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm")
original_agent = Agent(llm=llm, tools=[])
different_agent = DifferentAgent()
different_agent = _DifferentAgentForVerifyTest()

with pytest.raises(ValueError, match="Cannot load from persisted"):
original_agent.verify(different_agent)
Expand Down
24 changes: 16 additions & 8 deletions tests/sdk/conversation/test_visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
SystemPromptEvent,
UserRejectObservation,
)
from openhands.sdk.event.base import Event
from openhands.sdk.event.types import SourceType
from openhands.sdk.llm import (
Message,
MessageToolCall,
Expand All @@ -33,6 +35,18 @@
from openhands.sdk.conversation.impl.local_conversation import LocalConversation


class _UnknownEventForVisualizerTest(Event):
"""Unknown event type for testing fallback visualization.

This class is defined at module level (rather than inside a test function) to
ensure it's importable by Pydantic during serialization/deserialization.
Defining it inside a test function causes test pollution when running tests
in parallel with pytest-xdist.
"""

source: SourceType = "agent"


class VisualizerMockAction(Action):
"""Mock action for testing."""

Expand Down Expand Up @@ -457,18 +471,12 @@ def test_metrics_abbreviation_formatting():

def test_event_base_fallback_visualize():
"""Test that Event provides fallback visualization."""
from openhands.sdk.event.base import Event
from openhands.sdk.event.types import SourceType

class UnknownEvent(Event):
source: SourceType = "agent"

event = UnknownEvent()
event = _UnknownEventForVisualizerTest()
result = event.visualize
assert isinstance(result, Text)

text_content = result.plain
assert "Unknown event type: UnknownEvent" in text_content
assert "Unknown event type: _UnknownEventForVisualizerTest" in text_content


def test_visualizer_conversation_state_update_event_skipped():
Expand Down
18 changes: 13 additions & 5 deletions tests/sdk/event/test_event_immutability.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,21 @@ def create(cls, *args, **kwargs) -> Sequence[Self]:
]


def test_event_base_is_frozen():
"""Test that Event instances are frozen and cannot be modified."""
class _TestEventForImmutability(Event):
"""Test event class for immutability tests.

This class is defined at module level (rather than inside a test function) to
ensure it's importable by Pydantic during serialization/deserialization.
Defining it inside a test function causes test pollution when running tests
in parallel with pytest-xdist.
"""

class TestEvent(Event):
test_field: str = "test_value"
test_field: str = "test_value"

event = TestEvent(source="agent", test_field="initial_value")

def test_event_base_is_frozen():
"""Test that Event instances are frozen and cannot be modified."""
event = _TestEventForImmutability(source="agent", test_field="initial_value")

# Test that we cannot modify any field
with pytest.raises(Exception): # Pydantic raises ValidationError for frozen models
Expand Down
21 changes: 15 additions & 6 deletions tests/sdk/llm/test_reasoning_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,20 @@

from litellm.types.utils import Choices, Message as LiteLLMMessage, ModelResponse, Usage

from openhands.sdk.tool import Action


class _TestActionForReasoningContent(Action):
"""A test action used for testing reasoning content in ActionEvent.

This class is defined at module level (rather than inside a test function) to
ensure it's importable by Pydantic during serialization/deserialization.
Defining it inside a test function causes test pollution when running tests
in parallel with pytest-xdist.
"""

action: str = "test"


def create_mock_response(content: str = "Test response", response_id: str = "test-id"):
"""Helper function to create properly structured mock responses."""
Expand Down Expand Up @@ -113,11 +127,6 @@ def test_action_event_with_reasoning_content():
MessageToolCall,
TextContent,
)
from openhands.sdk.tool import Action

# Create a simple action for testing
class TestAction(Action):
action: str = "test"

# Create a tool call
tool_call = MessageToolCall(
Expand All @@ -129,7 +138,7 @@ class TestAction(Action):

action_event = ActionEvent(
thought=[TextContent(text="I need to test this")],
action=TestAction(),
action=_TestActionForReasoningContent(),
tool_name="test_tool",
tool_call_id="test-id",
tool_call=tool_call,
Expand Down
18 changes: 13 additions & 5 deletions tests/sdk/mcp/test_mcp_action_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@
from openhands.sdk.mcp import MCPToolAction


class _ChildMCPToolActionForSerialization(MCPToolAction):
"""Child MCP action for testing declared fields with data.

This class is defined at module level (rather than inside a test function) to
ensure it's importable by Pydantic during serialization/deserialization.
Defining it inside a test function causes test pollution when running tests
in parallel with pytest-xdist.
"""

declared: int


def test_data_field_emerges_from_to_mcp_arguments():
"""Test that data field contents are returned by to_mcp_arguments."""
data = {"new_field": "value", "dynamic": 123}
Expand All @@ -18,12 +30,8 @@ def test_data_field_emerges_from_to_mcp_arguments():

def test_declared_child_fields_with_data():
"""Test that child classes work with the data field."""

class Child(MCPToolAction):
declared: int

data = {"tool_param": "value"}
a = Child(declared=7, data=data)
a = _ChildMCPToolActionForSerialization(declared=7, data=data)
out = a.to_mcp_arguments()

# Only data field contents should be in MCP arguments
Expand Down
43 changes: 30 additions & 13 deletions tests/sdk/tool/test_schema_immutability.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,34 @@ def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
return [TextContent(text=f"Result: {self.result}, Status: {self.status}")]


class _SchemaImmutabilityCustomAction(Action):
"""Custom action for testing schema inheritance immutability.

This class is defined at module level (rather than inside a test function) to
ensure it's importable by Pydantic during serialization/deserialization.
Defining it inside a test function causes test pollution when running tests
in parallel with pytest-xdist.
"""

custom_field: str = Field(description="Custom field")


class _SchemaImmutabilityCustomObservation(Observation):
"""Custom observation for testing schema inheritance immutability.

This class is defined at module level (rather than inside a test function) to
ensure it's importable by Pydantic during serialization/deserialization.
Defining it inside a test function causes test pollution when running tests
in parallel with pytest-xdist.
"""

custom_result: str = Field(description="Custom result")

@property
def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
return [TextContent(text=self.custom_result)]


def test_schema_is_frozen():
"""Test that Schema instances are frozen and cannot be modified."""
schema = MockSchema(name="test", value=42)
Expand Down Expand Up @@ -273,22 +301,11 @@ def test_all_schema_classes_are_frozen():

def test_schema_inheritance_preserves_immutability():
"""Test that classes inheriting from schema bases are also immutable."""

class SchemaImmutabilityCustomAction(Action):
custom_field: str = Field(description="Custom field")

class SchemaImmutabilityCustomObservation(Observation):
custom_result: str = Field(description="Custom result")

@property
def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
return [TextContent(text=self.custom_result)]

# Test that custom classes are also frozen
custom_action = SchemaImmutabilityCustomAction(custom_field="test")
custom_action = _SchemaImmutabilityCustomAction(custom_field="test")
with pytest.raises(ValidationError, match="Instance is frozen"):
custom_action.custom_field = "changed"

custom_obs = SchemaImmutabilityCustomObservation(custom_result="test")
custom_obs = _SchemaImmutabilityCustomObservation(custom_result="test")
with pytest.raises(ValidationError, match="Instance is frozen"):
custom_obs.custom_result = "changed"
Loading