Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions openhands-sdk/openhands/sdk/agent/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,13 @@ def verify(
if isinstance(event, ActionEvent) and event.tool_name
}

# Add builtin tool names from include_default_tools
# These are runtime names like 'finish', 'think'
for tool_class_name in self.include_default_tools:
tool_class = BUILT_IN_TOOL_CLASSES.get(tool_class_name)
if tool_class is not None:
runtime_names.add(tool_class.name)
Comment on lines +387 to +392
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Suggestion: The fix is correct, but consider adding a comment about naming consistency.

The runtime_names set now contains a mix of:

  • Tool spec class names like "TerminalTool" (from line 372)
  • Builtin runtime names like "finish", "think" (added here)

This works because events only contain runtime names, but it's conceptually inconsistent. A clarifying comment would help future maintainers:

Suggested change
# Add builtin tool names from include_default_tools
# These are runtime names like 'finish', 'think'
for tool_class_name in self.include_default_tools:
tool_class = BUILT_IN_TOOL_CLASSES.get(tool_class_name)
if tool_class is not None:
runtime_names.add(tool_class.name)
# Add builtin tool names from include_default_tools
# These are runtime names like 'finish', 'think'
# Note: runtime_names now contains a mix of Tool spec class names
# (e.g. "TerminalTool") and builtin runtime names (e.g. "finish").
# This works because events only reference runtime names.
for tool_class_name in self.include_default_tools:
tool_class = BUILT_IN_TOOL_CLASSES.get(tool_class_name)
if tool_class is not None:
runtime_names.add(tool_class.name)

Comment on lines +387 to +392
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Suggestion: The fix is correct, but consider adding a comment about naming consistency.

The runtime_names set now contains a mix of:

  • Tool spec class names like "TerminalTool" (from line 372)
  • Builtin runtime names like "finish", "think" (added here)

This works because events only contain runtime names, but it's conceptually inconsistent. A clarifying comment would help future maintainers:

Suggested change
# Add builtin tool names from include_default_tools
# These are runtime names like 'finish', 'think'
for tool_class_name in self.include_default_tools:
tool_class = BUILT_IN_TOOL_CLASSES.get(tool_class_name)
if tool_class is not None:
runtime_names.add(tool_class.name)
# Add builtin tool names from include_default_tools
# These are runtime names like 'finish', 'think'
# Note: runtime_names now contains a mix of Tool spec class names
# (e.g. "TerminalTool") and builtin runtime names (e.g. "finish").
# This works because events only reference runtime names.
for tool_class_name in self.include_default_tools:
tool_class = BUILT_IN_TOOL_CLASSES.get(tool_class_name)
if tool_class is not None:
runtime_names.add(tool_class.name)


# Only require tools that were actually used in history.
missing_used_tools = used_tools - runtime_names
if missing_used_tools:
Expand Down
147 changes: 147 additions & 0 deletions tests/sdk/conversation/local/test_state_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -1165,3 +1165,150 @@ def test_conversation_state_cipher_mismatch():
"API_KEY"
].get_value()
assert api_key_value is None


def test_agent_verify_builtin_tools_included_in_check():
"""Test that verify() correctly includes builtin tools in its check.

Builtin tools configured via include_default_tools are now correctly
recognized by their runtime names (e.g., 'finish', 'think') when
checking against event history.
"""
from openhands.sdk.agent import AgentBase
from openhands.sdk.event import ActionEvent
from openhands.sdk.llm import MessageToolCall
from openhands.sdk.tool import Tool

llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm")

# Create persisted agent with TerminalTool
persisted_agent_obj = Agent(
llm=llm,
tools=[Tool(name="TerminalTool")],
include_default_tools=["FinishTool"],
)

# Events from the session - only 'finish' was used (a builtin tool)
events_with_finish = [
ActionEvent(
source="agent",
thought=[],
tool_name="finish", # Runtime name, NOT 'FinishTool'
tool_call_id="call_123",
tool_call=MessageToolCall(
id="call_123",
name="finish",
arguments='{"message": "Done!"}',
origin="completion",
),
llm_response_id="resp_123",
),
]

# Serialize and deserialize to simulate loading from persistence
serialized = persisted_agent_obj.model_dump_json()
persisted_agent = AgentBase.model_validate_json(serialized)

# Create a runtime agent with DIFFERENT tools (FileEditorTool instead of
# TerminalTool) but still including FinishTool builtin
runtime_agent = Agent(
llm=llm,
tools=[Tool(name="FileEditorTool")], # Different from persisted!
include_default_tools=["FinishTool"],
)

# This should PASS since 'finish' is a builtin tool available via
# include_default_tools. The verify method adds builtin tool runtime names
# to the check.
result = runtime_agent.verify(persisted_agent, events=events_with_finish)
assert result is runtime_agent

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Important: Missing test coverage for the early return bug.

All three tests have different tool specs between persisted and runtime agents (TerminalTool vs FileEditorTool), so they never trigger the early return at line 375-376 of base.py.

Missing scenario that exposes the bug:

  • Both agents have SAME tools (e.g., both have TerminalTool)
  • But different include_default_tools (one has FinishTool, other doesn't)
  • Events show a builtin tool was used

This case currently has a bug where verification incorrectly passes (see review body).

Suggested additional test:

def test_agent_verify_same_tools_different_builtins_fails():
    """Test that verify fails when tools match but required builtin is missing."""
    llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm")
    
    # Persisted agent has FinishTool
    persisted_agent_obj = Agent(
        llm=llm,
        tools=[Tool(name="TerminalTool")],
        include_default_tools=["FinishTool"],
    )
    
    # Events show 'finish' was used
    events_with_finish = [
        ActionEvent(
            source="agent",
            thought=[],
            tool_name="finish",
            tool_call_id="call_123",
            tool_call=MessageToolCall(
                id="call_123",
                name="finish",
                arguments='{"message": "Done!"}',
                origin="completion",
            ),
            llm_response_id="resp_123",
        ),
    ]
    
    serialized = persisted_agent_obj.model_dump_json()
    persisted_agent = AgentBase.model_validate_json(serialized)
    
    # Runtime agent MISSING FinishTool
    runtime_agent = Agent(
        llm=llm,
        tools=[Tool(name="TerminalTool")],  # Same tool!
        include_default_tools=[],  # Missing FinishTool!
    )
    
    # Should fail but currently passes due to early return bug
    with pytest.raises(ValueError, match="missing from runtime.*finish"):
        runtime_agent.verify(persisted_agent, events=events_with_finish)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Important: Missing test coverage for the early return bug.

All three tests have different tool specs between persisted and runtime agents (TerminalTool vs FileEditorTool), so they never trigger the early return at line 375-376 of base.py.

Missing scenario that exposes the bug:

  • Both agents have SAME tools (e.g., both have TerminalTool)
  • But different include_default_tools (one has FinishTool, other doesn't)
  • Events show a builtin tool was used

This case currently has a bug where verification incorrectly passes (see review body).

Suggested additional test:

def test_agent_verify_same_tools_different_builtins_fails():
    """Test that verify fails when tools match but required builtin is missing."""
    llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm")
    
    # Persisted agent has FinishTool
    persisted_agent_obj = Agent(
        llm=llm,
        tools=[Tool(name="TerminalTool")],
        include_default_tools=["FinishTool"],
    )
    
    # Events show 'finish' was used
    events_with_finish = [
        ActionEvent(
            source="agent",
            thought=[],
            tool_name="finish",
            tool_call_id="call_123",
            tool_call=MessageToolCall(
                id="call_123",
                name="finish",
                arguments='{"message": "Done!"}',
                origin="completion",
            ),
            llm_response_id="resp_123",
        ),
    ]
    
    serialized = persisted_agent_obj.model_dump_json()
    persisted_agent = AgentBase.model_validate_json(serialized)
    
    # Runtime agent MISSING FinishTool
    runtime_agent = Agent(
        llm=llm,
        tools=[Tool(name="TerminalTool")],  # Same tool!
        include_default_tools=[],  # Missing FinishTool!
    )
    
    # Should fail but currently passes due to early return bug
    with pytest.raises(ValueError, match="missing from runtime.*finish"):
        runtime_agent.verify(persisted_agent, events=events_with_finish)


def test_agent_verify_think_builtin_tool_included():
"""Test that 'think' builtin tool is correctly included in event check."""
from openhands.sdk.agent import AgentBase
from openhands.sdk.event import ActionEvent
from openhands.sdk.llm import MessageToolCall
from openhands.sdk.tool import Tool

llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm")

persisted_agent_obj = Agent(
llm=llm,
tools=[Tool(name="TerminalTool")],
include_default_tools=["ThinkTool"],
)

events_with_think = [
ActionEvent(
source="agent",
thought=[],
tool_name="think",
tool_call_id="call_123",
tool_call=MessageToolCall(
id="call_123",
name="think",
arguments='{"thought": "Let me think..."}',
origin="completion",
),
llm_response_id="resp_123",
),
]

serialized = persisted_agent_obj.model_dump_json()
persisted_agent = AgentBase.model_validate_json(serialized)

runtime_agent = Agent(
llm=llm,
tools=[Tool(name="FileEditorTool")],
include_default_tools=["ThinkTool"],
)

result = runtime_agent.verify(persisted_agent, events=events_with_think)
assert result is runtime_agent


def test_agent_verify_missing_builtin_tool_fails():
"""Test that verify fails when a used builtin tool is not configured."""
from openhands.sdk.agent import AgentBase
from openhands.sdk.event import ActionEvent
from openhands.sdk.llm import MessageToolCall
from openhands.sdk.tool import Tool

llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm")

persisted_agent_obj = Agent(
llm=llm,
tools=[Tool(name="TerminalTool")],
include_default_tools=["FinishTool"], # Has FinishTool
)

events_with_finish = [
ActionEvent(
source="agent",
thought=[],
tool_name="finish",
tool_call_id="call_123",
tool_call=MessageToolCall(
id="call_123",
name="finish",
arguments='{"message": "Done!"}',
origin="completion",
),
llm_response_id="resp_123",
),
]

serialized = persisted_agent_obj.model_dump_json()
persisted_agent = AgentBase.model_validate_json(serialized)

# Runtime agent does NOT have FinishTool in include_default_tools
runtime_agent = Agent(
llm=llm,
tools=[Tool(name="FileEditorTool")],
include_default_tools=[], # No FinishTool!
)

# Should fail because 'finish' was used but FinishTool is not configured
with pytest.raises(ValueError, match="missing from runtime.*finish"):
runtime_agent.verify(persisted_agent, events=events_with_finish)
Loading