Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
82b191c
refactor: remove reconciliation methods, use runtime agent directly
enyst Dec 29, 2025
a1d7101
feat: add agent.load() method to validate tools match on restore
enyst Dec 31, 2025
8a7c73d
Update openhands-sdk/openhands/sdk/conversation/state.py
enyst Dec 31, 2025
730de13
Update openhands-sdk/openhands/sdk/agent/base.py
enyst Dec 31, 2025
16c6999
refactor: rename load() to verify() for clarity
enyst Dec 31, 2025
174094b
refactor: use persisted state directly, update agent field
enyst Dec 31, 2025
8263551
fix: override runtime-provided values on resume
enyst Dec 31, 2025
425828a
fix: keep stuck_detection from persisted state
enyst Dec 31, 2025
0f6bfc2
test: add comprehensive tests for resume behavior
enyst Dec 31, 2025
abb44f1
Merge branch 'main' into openhands/remove-reconciliation-methods
enyst Dec 31, 2025
e55581f
chore: remove unused LLM OVERRIDE_ON_SERIALIZE
enyst Dec 31, 2025
5d7afb3
docs: clarify ConversationState.workspace is a workspace object
enyst Dec 31, 2025
7b37817
Merge branch 'main' into openhands/remove-reconciliation-methods
enyst Jan 3, 2026
027dce6
Merge branch 'main' into openhands/remove-reconciliation-methods
enyst Jan 3, 2026
6b90933
Update openhands-sdk/openhands/sdk/conversation/state.py
enyst Jan 3, 2026
bf6382a
Update openhands-sdk/openhands/sdk/conversation/state.py
enyst Jan 3, 2026
5f2a32c
test: rename reconciliation tests to loading
enyst Jan 3, 2026
0ef84cf
test: cover secret masking in LLM json dumps
enyst Jan 3, 2026
05cdbff
merge main into openhands/remove-reconciliation-methods
enyst Jan 5, 2026
43ae5f5
test: cover resume overrides agent/LLM while preserving state settings
enyst Jan 5, 2026
9553377
test(integration): add restore conversation test
enyst Jan 5, 2026
d5018f4
test(integration): make t10_restore_conversation runnable without LLM…
enyst Jan 5, 2026
1bc6797
Update tests/integration/tests/t10_restore_conversation.py
enyst Jan 5, 2026
62b5f03
Revert "test(integration): make t10_restore_conversation runnable wit…
enyst Jan 5, 2026
8c59ef0
test(integration): persist t10 conversations under repo outputs dir
enyst Jan 5, 2026
9e25d61
test(integration): ensure restore test persists user history events
enyst Jan 5, 2026
fe67cc6
test(integration): assert restored conversation preserves event count
enyst Jan 5, 2026
07da0d8
test(integration): send follow-up message after restore
enyst Jan 5, 2026
3f1bf6e
Update openhands-sdk/openhands/sdk/agent/base.py
enyst Jan 5, 2026
253803b
refactor: simplify tool mismatch errors in AgentBase.verify
enyst Jan 5, 2026
f020d00
Update openhands-sdk/openhands/sdk/conversation/state.py
enyst Jan 5, 2026
a001f4e
Update tests/cross/test_agent_loading.py
enyst Jan 5, 2026
ea525f2
Update tests/cross/test_agent_loading.py
enyst Jan 5, 2026
a034025
Update tests/cross/test_agent_loading.py
enyst Jan 5, 2026
897d4c0
Update tests/integration/tests/t10_restore_conversation.py
enyst Jan 5, 2026
cc8d500
test(integration): use ERROR status for restore + avoid brittle statu…
enyst Jan 5, 2026
64e46ed
test(integration): assert confirmation policy + finished status after…
enyst Jan 5, 2026
bb8c84d
test(integration): set ERROR before follow-up run
enyst Jan 5, 2026
1b9f9d5
Update openhands-sdk/openhands/sdk/conversation/state.py
enyst Jan 5, 2026
c06da9c
Update tests/integration/tests/t10_restore_conversation.py
enyst Jan 5, 2026
5927553
test(integration): verify llm1 persisted fields in base_state
enyst Jan 5, 2026
142c4ca
Merge branch 'main' into openhands/remove-reconciliation-methods
enyst Jan 5, 2026
90ff68d
merge main into openhands/remove-reconciliation-methods
enyst Jan 6, 2026
6eabe87
Update tests/sdk/conversation/local/test_state_serialization.py
enyst Jan 6, 2026
d4948ce
test: assert persisted stats fields on resume
enyst Jan 6, 2026
ececa9e
test: add explicit asserts/comments for resumed stats
enyst Jan 6, 2026
78558d0
test: add failure message for context_window assert
enyst Jan 6, 2026
99d3e7d
test: dedup resumed stats asserts while keeping messages
enyst Jan 6, 2026
50ae74d
Update tests/sdk/conversation/local/test_state_serialization.py
enyst Jan 6, 2026
7d54ca6
test(cross): cover resume failure when agent type changes
enyst Jan 6, 2026
ede04b7
test(cross): use top-level pytest import
enyst Jan 6, 2026
8e12188
test(integration): run t10 restore once with 2 LLM configs
enyst Jan 6, 2026
a2aa1b9
fix(integration): skip t10 only for integration runs; read restore co…
enyst Jan 6, 2026
42fa809
Revert "fix(integration): skip t10 only for integration runs; read re…
enyst Jan 6, 2026
1734d51
Revert "test(integration): run t10 restore once with 2 LLM configs"
enyst Jan 6, 2026
f4b52ca
test(integration): remove t10 restore behavior test (moved to split P…
enyst Jan 6, 2026
b49f1ef
Merge branch 'main' into openhands/remove-reconciliation-methods
xingyaoww Jan 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 48 additions & 73 deletions openhands-sdk/openhands/sdk/agent/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr

from openhands.sdk.context.agent_context import AgentContext
from openhands.sdk.context.condenser import CondenserBase, LLMSummarizingCondenser
from openhands.sdk.context.condenser import CondenserBase
from openhands.sdk.context.prompts.prompt import render_template
from openhands.sdk.llm import LLM
from openhands.sdk.llm.utils.model_prompt_spec import get_model_prompt_spec
from openhands.sdk.logger import get_logger
from openhands.sdk.mcp import create_mcp_tools
from openhands.sdk.tool import BUILT_IN_TOOLS, Tool, ToolDefinition, resolve_tool
from openhands.sdk.utils.models import DiscriminatedUnionMixin
from openhands.sdk.utils.pydantic_diff import pretty_pydantic_diff


if TYPE_CHECKING:
Expand Down Expand Up @@ -300,64 +299,52 @@ def step(
NOTE: state will be mutated in-place.
"""

def resolve_diff_from_deserialized(
def verify(
self,
persisted: "AgentBase",
events: "Sequence[Any] | None" = None,
) -> "AgentBase":
"""
Return a new AgentBase instance equivalent to `persisted` but with
explicitly whitelisted fields (e.g. api_key) taken from `self`.
"""Verify that we can resume this agent from persisted state.

This PR's goal is to *not* reconcile configuration between persisted and
runtime Agent instances. Instead, we verify compatibility requirements
and then continue with the runtime-provided Agent.

Compatibility requirements:
- Agent class/type must match.
- Tools:
- If events are provided, only tools that were actually used in history
must exist in runtime.
- If events are not provided, tool names must match exactly.

All other configuration (LLM, agent_context, condenser, system prompts,
etc.) can be freely changed between sessions.

Args:
persisted: The persisted agent from the conversation state.
events: Optional event sequence to scan for used tools if tool
names don't match. Only scanned when needed (O(n) fallback).
persisted: The agent loaded from persisted state.
events: Optional event sequence to scan for used tools if tool names
don't match.

Returns:
This runtime agent (self) if verification passes.

Raises:
ValueError: If agent class or tools don't match.
"""
if persisted.__class__ is not self.__class__:
raise ValueError(
f"Cannot resolve from deserialized: persisted agent is of type "
"Cannot load from persisted: persisted agent is of type "
f"{persisted.__class__.__name__}, but self is of type "
f"{self.__class__.__name__}."
)

# Get all LLMs from both self and persisted to reconcile them
new_llm = self.llm.resolve_diff_from_deserialized(persisted.llm)
updates: dict[str, Any] = {"llm": new_llm}

# Reconcile the condenser's LLM if it exists
if self.condenser is not None and persisted.condenser is not None:
# Check if both condensers are LLMSummarizingCondenser
# (which has an llm field)

if isinstance(self.condenser, LLMSummarizingCondenser) and isinstance(
persisted.condenser, LLMSummarizingCondenser
):
new_condenser_llm = self.condenser.llm.resolve_diff_from_deserialized(
persisted.condenser.llm
)
new_condenser = persisted.condenser.model_copy(
update={"llm": new_condenser_llm}
)
updates["condenser"] = new_condenser

# Reconcile agent_context - always use the current environment's agent_context
# This allows resuming conversations from different directories and handles
# cases where skills, working directory, or other context has changed
if self.agent_context is not None:
updates["agent_context"] = self.agent_context

# Get tool names for comparison
runtime_names = {tool.name for tool in self.tools}
persisted_names = {tool.name for tool in persisted.tools}

# If tool names match exactly, no need to check event history
if runtime_names == persisted_names:
# Tools unchanged, proceed normally
pass
elif events is not None:
# Tool names differ - scan events to find which tools were actually used
# This is O(n) but only happens when tools change
return self

if events is not None:
from openhands.sdk.event import ActionEvent

used_tools = {
Expand All @@ -366,43 +353,31 @@ def resolve_diff_from_deserialized(
if isinstance(event, ActionEvent) and event.tool_name
}

# Only require tools that were actually used in history
# Only require tools that were actually used in history.
missing_used_tools = used_tools - runtime_names
if missing_used_tools:
raise ValueError(
f"Cannot resume conversation: tools that were used in history "
"Cannot resume conversation: tools that were used in history "
f"are missing from runtime: {sorted(missing_used_tools)}. "
f"Available tools: {sorted(runtime_names)}"
)
# Update tools to match runtime (allows new tools to be added)
updates["tools"] = self.tools
else:
# No events provided - strict matching (legacy behavior)
missing_in_runtime = persisted_names - runtime_names
missing_in_persisted = runtime_names - persisted_names
error_msg = "Tools don't match between runtime and persisted agents."
if missing_in_runtime:
error_msg += f" Missing in runtime: {sorted(missing_in_runtime)}."
if missing_in_persisted:
error_msg += f" Missing in persisted: {sorted(missing_in_persisted)}."
raise ValueError(error_msg)

reconciled = persisted.model_copy(update=updates)

# Validate agent equality - exclude tools from comparison since we
# already validated tool requirements above
exclude_fields = {"tools"} if events is not None else set()
self_dump = self.model_dump(exclude_none=True, exclude=exclude_fields)
reconciled_dump = reconciled.model_dump(
exclude_none=True, exclude=exclude_fields
)

if self_dump != reconciled_dump:
raise ValueError(
"The Agent provided is different from the one in persisted state.\n"
f"Diff: {pretty_pydantic_diff(self, reconciled)}"
)
return reconciled
return self

# No events provided: strict tool name matching.
missing_in_runtime = persisted_names - runtime_names
missing_in_persisted = runtime_names - persisted_names

details: list[str] = []
if missing_in_runtime:
details.append(f"Missing in runtime: {sorted(missing_in_runtime)}")
if missing_in_persisted:
details.append(f"Missing in persisted: {sorted(missing_in_persisted)}")

suffix = f" ({'; '.join(details)})" if details else ""
raise ValueError(
"Tools don't match between runtime and persisted agents." + suffix
)

def model_dump_succint(self, **kwargs):
"""Like model_dump, but excludes None fields by default."""
Expand Down
61 changes: 43 additions & 18 deletions openhands-sdk/openhands/sdk/conversation/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@ class ConversationState(OpenHandsModel):
)
workspace: BaseWorkspace = Field(
...,
description="Working directory for agent operations and tool execution",
description=(
"Workspace used by the agent to execute commands and read/write files. "
"Not the process working directory."
),
)
persistence_dir: str | None = Field(
default="workspace/conversations",
Expand Down Expand Up @@ -172,10 +175,35 @@ def create(
max_iterations: int = 500,
stuck_detection: bool = True,
) -> "ConversationState":
"""
If base_state.json exists: resume (attach EventLog,
reconcile agent, enforce id).
Else: create fresh (agent required), persist base, and return.
"""Create a new conversation state or resume from persistence.

This factory method handles both new conversation creation and resumption
from persisted state.

**New conversation:**
The provided Agent is used directly. Pydantic validation happens via the
cls() constructor.

**Restored conversation:**
The provided Agent is validated against the persisted agent using
agent.load(). Tools must match (they may have been used in conversation
history), but all other configuration can be freely changed: LLM,
agent_context, condenser, system prompts, etc.

Args:
id: Unique conversation identifier
agent: The Agent to use (tools must match persisted on restore)
workspace: Working directory for agent operations
persistence_dir: Directory for persisting state and events
max_iterations: Maximum iterations per run
stuck_detection: Whether to enable stuck detection

Returns:
ConversationState ready for use

Raises:
ValueError: If conversation ID or tools mismatch on restore
ValidationError: If agent or other fields fail Pydantic validation
"""
file_store = (
LocalFileStore(persistence_dir, cache_limit_size=max_iterations)
Expand All @@ -192,29 +220,28 @@ def create(
if base_text:
state = cls.model_validate(json.loads(base_text))

# Enforce conversation id match
# Restore the conversation with the same id
if state.id != id:
raise ValueError(
f"Conversation ID mismatch: provided {id}, "
f"but persisted state has {state.id}"
)

# Attach event log early so we can read history
# Attach event log early so we can read history for tool verification
state._fs = file_store
state._events = EventLog(file_store, dir_path=EVENTS_DIR)

# Reconcile agent config with deserialized one
# Pass event log so tool usage can be checked on-the-fly if needed
resolved = agent.resolve_diff_from_deserialized(
state.agent, events=state._events
)
# Verify compatibility (agent class + tools)
agent.verify(state.agent, events=state._events)

# Commit reconciled agent (may autosave)
# Commit runtime-provided values (may autosave)
state._autosave_enabled = True
state.agent = resolved
state.agent = agent
state.workspace = workspace
state.max_iterations = max_iterations

# Note: stats are already deserialized from base_state.json above
# Do NOT reset stats here - this would lose accumulated metrics
# Note: stats are already deserialized from base_state.json above.
# Do NOT reset stats here - this would lose accumulated metrics.

logger.info(
f"Resumed conversation {state.id} from persistent storage.\n"
Expand All @@ -237,8 +264,6 @@ def create(
max_iterations=max_iterations,
stuck_detection=stuck_detection,
)
# Record existing analyzer configuration in state
state.security_analyzer = state.security_analyzer
state._fs = file_store
state._events = EventLog(file_store, dir_path=EVENTS_DIR)
state.stats = ConversationStats()
Expand Down
51 changes: 0 additions & 51 deletions openhands-sdk/openhands/sdk/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@
if TYPE_CHECKING: # type hints only, avoid runtime import cycle
from openhands.sdk.tool.tool import ToolDefinition

from openhands.sdk.utils.pydantic_diff import pretty_pydantic_diff


with warnings.catch_warnings():
warnings.simplefilter("ignore")
Expand Down Expand Up @@ -322,19 +320,6 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
exclude=True,
)
_metrics: Metrics | None = PrivateAttr(default=None)
# ===== Plain class vars (NOT Fields) =====
# When serializing, these fields (SecretStr) will be dump to "****"
# When deserializing, these fields will be ignored and we will override
# them from the LLM instance provided at runtime.
OVERRIDE_ON_SERIALIZE: tuple[str, ...] = (
"api_key",
"aws_access_key_id",
"aws_secret_access_key",
# Dynamic runtime metadata for telemetry/routing that can differ across sessions
# and should not cause resume-time diffs. Always prefer the runtime value.
"litellm_extra_body",
)

# Runtime-only private attrs
_model_info: Any = PrivateAttr(default=None)
_tokenizer: Any = PrivateAttr(default=None)
Expand Down Expand Up @@ -1101,39 +1086,3 @@ def _cast_value(raw: str, t: Any) -> Any:
if v is not None:
data[field_name] = v
return cls(**data)

def resolve_diff_from_deserialized(self, persisted: LLM) -> LLM:
"""Resolve differences between a deserialized LLM and the current instance.

This is due to fields like api_key being serialized to "****" in dumps,
and we want to ensure that when loading from a file, we still use the
runtime-provided api_key in the self instance.

Return a new LLM instance equivalent to `persisted` but with
explicitly whitelisted fields (e.g. api_key) taken from `self`.
"""
if persisted.__class__ is not self.__class__:
raise ValueError(
f"Cannot resolve_diff_from_deserialized between {self.__class__} "
f"and {persisted.__class__}"
)

# Copy allowed fields from runtime llm into the persisted llm
llm_updates = {}
persisted_dump = persisted.model_dump(context={"expose_secrets": True})
for field in self.OVERRIDE_ON_SERIALIZE:
if field in persisted_dump.keys():
llm_updates[field] = getattr(self, field)
if llm_updates:
reconciled = persisted.model_copy(update=llm_updates)
else:
reconciled = persisted

dump = self.model_dump(context={"expose_secrets": True})
reconciled_dump = reconciled.model_dump(context={"expose_secrets": True})
if dump != reconciled_dump:
raise ValueError(
"The LLM provided is different from the one in persisted state.\n"
f"Diff: {pretty_pydantic_diff(self, reconciled)}"
)
return reconciled
Loading
Loading