diff --git a/openhands-sdk/openhands/sdk/agent/base.py b/openhands-sdk/openhands/sdk/agent/base.py
index 1bf2bbb166..5d65cc090c 100644
--- a/openhands-sdk/openhands/sdk/agent/base.py
+++ b/openhands-sdk/openhands/sdk/agent/base.py
@@ -9,7 +9,7 @@
 from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
 
 from openhands.sdk.context.agent_context import AgentContext
-from openhands.sdk.context.condenser import CondenserBase, LLMSummarizingCondenser
+from openhands.sdk.context.condenser import CondenserBase
 from openhands.sdk.context.prompts.prompt import render_template
 from openhands.sdk.llm import LLM
 from openhands.sdk.llm.utils.model_prompt_spec import get_model_prompt_spec
@@ -17,7 +17,6 @@
 from openhands.sdk.mcp import create_mcp_tools
 from openhands.sdk.tool import BUILT_IN_TOOLS, Tool, ToolDefinition, resolve_tool
 from openhands.sdk.utils.models import DiscriminatedUnionMixin
-from openhands.sdk.utils.pydantic_diff import pretty_pydantic_diff
 
 
 if TYPE_CHECKING:
@@ -300,64 +299,52 @@ def step(
         NOTE: state will be mutated in-place.
         """
 
-    def resolve_diff_from_deserialized(
+    def verify(
         self,
         persisted: "AgentBase",
         events: "Sequence[Any] | None" = None,
     ) -> "AgentBase":
-        """
-        Return a new AgentBase instance equivalent to `persisted` but with
-        explicitly whitelisted fields (e.g. api_key) taken from `self`.
+        """Verify that we can resume this agent from persisted state.
+
+        This PR's goal is to *not* reconcile configuration between persisted and
+        runtime Agent instances. Instead, we verify compatibility requirements
+        and then continue with the runtime-provided Agent.
+
+        Compatibility requirements:
+        - Agent class/type must match.
+        - Tools:
+          - If events are provided, only tools that were actually used in history
+            must exist in runtime.
+          - If events are not provided, tool names must match exactly.
+
+        All other configuration (LLM, agent_context, condenser, system prompts,
+        etc.) can be freely changed between sessions.
 
         Args:
-            persisted: The persisted agent from the conversation state.
-            events: Optional event sequence to scan for used tools if tool
-                names don't match. Only scanned when needed (O(n) fallback).
+            persisted: The agent loaded from persisted state.
+            events: Optional event sequence to scan for used tools if tool names
+                don't match.
+
+        Returns:
+            This runtime agent (self) if verification passes.
+
+        Raises:
+            ValueError: If agent class or tools don't match.
         """
         if persisted.__class__ is not self.__class__:
             raise ValueError(
-                f"Cannot resolve from deserialized: persisted agent is of type "
+                "Cannot load from persisted: persisted agent is of type "
                 f"{persisted.__class__.__name__}, but self is of type "
                 f"{self.__class__.__name__}."
             )
 
-        # Get all LLMs from both self and persisted to reconcile them
-        new_llm = self.llm.resolve_diff_from_deserialized(persisted.llm)
-        updates: dict[str, Any] = {"llm": new_llm}
-
-        # Reconcile the condenser's LLM if it exists
-        if self.condenser is not None and persisted.condenser is not None:
-            # Check if both condensers are LLMSummarizingCondenser
-            # (which has an llm field)
-
-            if isinstance(self.condenser, LLMSummarizingCondenser) and isinstance(
-                persisted.condenser, LLMSummarizingCondenser
-            ):
-                new_condenser_llm = self.condenser.llm.resolve_diff_from_deserialized(
-                    persisted.condenser.llm
-                )
-                new_condenser = persisted.condenser.model_copy(
-                    update={"llm": new_condenser_llm}
-                )
-                updates["condenser"] = new_condenser
-
-        # Reconcile agent_context - always use the current environment's agent_context
-        # This allows resuming conversations from different directories and handles
-        # cases where skills, working directory, or other context has changed
-        if self.agent_context is not None:
-            updates["agent_context"] = self.agent_context
-
-        # Get tool names for comparison
         runtime_names = {tool.name for tool in self.tools}
         persisted_names = {tool.name for tool in persisted.tools}
 
-        # If tool names match exactly, no need to check event history
         if runtime_names == persisted_names:
-            # Tools unchanged, proceed normally
-            pass
-        elif events is not None:
-            # Tool names differ - scan events to find which tools were actually used
-            # This is O(n) but only happens when tools change
+            return self
+
+        if events is not None:
             from openhands.sdk.event import ActionEvent
 
             used_tools = {
@@ -366,43 +353,31 @@ def resolve_diff_from_deserialized(
                 if isinstance(event, ActionEvent) and event.tool_name
             }
 
-            # Only require tools that were actually used in history
+            # Only require tools that were actually used in history.
             missing_used_tools = used_tools - runtime_names
             if missing_used_tools:
                 raise ValueError(
-                    f"Cannot resume conversation: tools that were used in history "
+                    "Cannot resume conversation: tools that were used in history "
                     f"are missing from runtime: {sorted(missing_used_tools)}. "
                     f"Available tools: {sorted(runtime_names)}"
                 )
-            # Update tools to match runtime (allows new tools to be added)
-            updates["tools"] = self.tools
-        else:
-            # No events provided - strict matching (legacy behavior)
-            missing_in_runtime = persisted_names - runtime_names
-            missing_in_persisted = runtime_names - persisted_names
-            error_msg = "Tools don't match between runtime and persisted agents."
-            if missing_in_runtime:
-                error_msg += f" Missing in runtime: {sorted(missing_in_runtime)}."
-            if missing_in_persisted:
-                error_msg += f" Missing in persisted: {sorted(missing_in_persisted)}."
-            raise ValueError(error_msg)
-
-        reconciled = persisted.model_copy(update=updates)
-
-        # Validate agent equality - exclude tools from comparison since we
-        # already validated tool requirements above
-        exclude_fields = {"tools"} if events is not None else set()
-        self_dump = self.model_dump(exclude_none=True, exclude=exclude_fields)
-        reconciled_dump = reconciled.model_dump(
-            exclude_none=True, exclude=exclude_fields
-        )
 
-        if self_dump != reconciled_dump:
-            raise ValueError(
-                "The Agent provided is different from the one in persisted state.\n"
-                f"Diff: {pretty_pydantic_diff(self, reconciled)}"
-            )
-        return reconciled
+            return self
+
+        # No events provided: strict tool name matching.
+        missing_in_runtime = persisted_names - runtime_names
+        missing_in_persisted = runtime_names - persisted_names
+
+        details: list[str] = []
+        if missing_in_runtime:
+            details.append(f"Missing in runtime: {sorted(missing_in_runtime)}")
+        if missing_in_persisted:
+            details.append(f"Missing in persisted: {sorted(missing_in_persisted)}")
+
+        suffix = f" ({'; '.join(details)})" if details else ""
+        raise ValueError(
+            "Tools don't match between runtime and persisted agents." + suffix
+        )
 
     def model_dump_succint(self, **kwargs):
         """Like model_dump, but excludes None fields by default."""
diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py
index d4d179e15f..77dfca9e5f 100644
--- a/openhands-sdk/openhands/sdk/conversation/state.py
+++ b/openhands-sdk/openhands/sdk/conversation/state.py
@@ -60,7 +60,10 @@ class ConversationState(OpenHandsModel):
     )
     workspace: BaseWorkspace = Field(
         ...,
-        description="Working directory for agent operations and tool execution",
+        description=(
+            "Workspace used by the agent to execute commands and read/write files. "
+            "Not the process working directory."
+        ),
     )
     persistence_dir: str | None = Field(
         default="workspace/conversations",
@@ -172,10 +175,35 @@ def create(
         max_iterations: int = 500,
         stuck_detection: bool = True,
     ) -> "ConversationState":
-        """
-        If base_state.json exists: resume (attach EventLog,
-            reconcile agent, enforce id).
-        Else: create fresh (agent required), persist base, and return.
+        """Create a new conversation state or resume from persistence.
+
+        This factory method handles both new conversation creation and resumption
+        from persisted state.
+
+        **New conversation:**
+        The provided Agent is used directly. Pydantic validation happens via the
+        cls() constructor.
+
+        **Restored conversation:**
+        The provided Agent is validated against the persisted agent using
+        agent.load(). Tools must match (they may have been used in conversation
+        history), but all other configuration can be freely changed: LLM,
+        agent_context, condenser, system prompts, etc.
+
+        Args:
+            id: Unique conversation identifier
+            agent: The Agent to use (tools must match persisted on restore)
+            workspace: Working directory for agent operations
+            persistence_dir: Directory for persisting state and events
+            max_iterations: Maximum iterations per run
+            stuck_detection: Whether to enable stuck detection
+
+        Returns:
+            ConversationState ready for use
+
+        Raises:
+            ValueError: If conversation ID or tools mismatch on restore
+            ValidationError: If agent or other fields fail Pydantic validation
         """
         file_store = (
             LocalFileStore(persistence_dir, cache_limit_size=max_iterations)
@@ -192,29 +220,28 @@ def create(
         if base_text:
             state = cls.model_validate(json.loads(base_text))
 
-            # Enforce conversation id match
+            # Restore the conversation with the same id
             if state.id != id:
                 raise ValueError(
                     f"Conversation ID mismatch: provided {id}, "
                     f"but persisted state has {state.id}"
                 )
 
-            # Attach event log early so we can read history
+            # Attach event log early so we can read history for tool verification
             state._fs = file_store
             state._events = EventLog(file_store, dir_path=EVENTS_DIR)
 
-            # Reconcile agent config with deserialized one
-            # Pass event log so tool usage can be checked on-the-fly if needed
-            resolved = agent.resolve_diff_from_deserialized(
-                state.agent, events=state._events
-            )
+            # Verify compatibility (agent class + tools)
+            agent.verify(state.agent, events=state._events)
 
-            # Commit reconciled agent (may autosave)
+            # Commit runtime-provided values (may autosave)
             state._autosave_enabled = True
-            state.agent = resolved
+            state.agent = agent
+            state.workspace = workspace
+            state.max_iterations = max_iterations
 
-            # Note: stats are already deserialized from base_state.json above
-            # Do NOT reset stats here - this would lose accumulated metrics
+            # Note: stats are already deserialized from base_state.json above.
+            # Do NOT reset stats here - this would lose accumulated metrics.
 
             logger.info(
                 f"Resumed conversation {state.id} from persistent storage.\n"
@@ -237,8 +264,6 @@ def create(
             max_iterations=max_iterations,
             stuck_detection=stuck_detection,
         )
-        # Record existing analyzer configuration in state
-        state.security_analyzer = state.security_analyzer
         state._fs = file_store
         state._events = EventLog(file_store, dir_path=EVENTS_DIR)
         state.stats = ConversationStats()
diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py
index e20bfca759..a88be306dd 100644
--- a/openhands-sdk/openhands/sdk/llm/llm.py
+++ b/openhands-sdk/openhands/sdk/llm/llm.py
@@ -28,8 +28,6 @@
 if TYPE_CHECKING:  # type hints only, avoid runtime import cycle
     from openhands.sdk.tool.tool import ToolDefinition
 
-from openhands.sdk.utils.pydantic_diff import pretty_pydantic_diff
-
 
 with warnings.catch_warnings():
     warnings.simplefilter("ignore")
@@ -322,19 +320,6 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
         exclude=True,
     )
     _metrics: Metrics | None = PrivateAttr(default=None)
-    # ===== Plain class vars (NOT Fields) =====
-    # When serializing, these fields (SecretStr) will be dump to "****"
-    # When deserializing, these fields will be ignored and we will override
-    # them from the LLM instance provided at runtime.
-    OVERRIDE_ON_SERIALIZE: tuple[str, ...] = (
-        "api_key",
-        "aws_access_key_id",
-        "aws_secret_access_key",
-        # Dynamic runtime metadata for telemetry/routing that can differ across sessions
-        # and should not cause resume-time diffs. Always prefer the runtime value.
-        "litellm_extra_body",
-    )
-
     # Runtime-only private attrs
     _model_info: Any = PrivateAttr(default=None)
     _tokenizer: Any = PrivateAttr(default=None)
@@ -1101,39 +1086,3 @@ def _cast_value(raw: str, t: Any) -> Any:
             if v is not None:
                 data[field_name] = v
         return cls(**data)
-
-    def resolve_diff_from_deserialized(self, persisted: LLM) -> LLM:
-        """Resolve differences between a deserialized LLM and the current instance.
-
-        This is due to fields like api_key being serialized to "****" in dumps,
-        and we want to ensure that when loading from a file, we still use the
-        runtime-provided api_key in the self instance.
-
-        Return a new LLM instance equivalent to `persisted` but with
-        explicitly whitelisted fields (e.g. api_key) taken from `self`.
-        """
-        if persisted.__class__ is not self.__class__:
-            raise ValueError(
-                f"Cannot resolve_diff_from_deserialized between {self.__class__} "
-                f"and {persisted.__class__}"
-            )
-
-        # Copy allowed fields from runtime llm into the persisted llm
-        llm_updates = {}
-        persisted_dump = persisted.model_dump(context={"expose_secrets": True})
-        for field in self.OVERRIDE_ON_SERIALIZE:
-            if field in persisted_dump.keys():
-                llm_updates[field] = getattr(self, field)
-        if llm_updates:
-            reconciled = persisted.model_copy(update=llm_updates)
-        else:
-            reconciled = persisted
-
-        dump = self.model_dump(context={"expose_secrets": True})
-        reconciled_dump = reconciled.model_dump(context={"expose_secrets": True})
-        if dump != reconciled_dump:
-            raise ValueError(
-                "The LLM provided is different from the one in persisted state.\n"
-                f"Diff: {pretty_pydantic_diff(self, reconciled)}"
-            )
-        return reconciled
diff --git a/tests/cross/test_agent_reconciliation.py b/tests/cross/test_agent_loading.py
similarity index 76%
rename from tests/cross/test_agent_reconciliation.py
rename to tests/cross/test_agent_loading.py
index 1696fffdb6..5bce5ee614 100644
--- a/tests/cross/test_agent_reconciliation.py
+++ b/tests/cross/test_agent_loading.py
@@ -1,19 +1,20 @@
-"""Test agent reconciliation logic in agent deserialization and conversation restart."""
+"""Test agent loading (conversation restart) behavior."""
 
 import tempfile
 import uuid
 from unittest.mock import patch
 
+import pytest
 from pydantic import SecretStr
 
 from openhands.sdk import Agent
-from openhands.sdk.agent import AgentBase
 from openhands.sdk.context import AgentContext, Skill
 from openhands.sdk.context.condenser.llm_summarizing_condenser import (
     LLMSummarizingCondenser,
 )
 from openhands.sdk.conversation import Conversation
 from openhands.sdk.conversation.impl.local_conversation import LocalConversation
+from openhands.sdk.conversation.state import ConversationExecutionStatus
 from openhands.sdk.llm import LLM, Message, TextContent
 from openhands.sdk.tool import Tool, register_tool
 from openhands.tools.file_editor import FileEditorTool
@@ -25,6 +26,10 @@
 register_tool("FileEditorTool", FileEditorTool)
 
 
+class ModuleScopeOtherAgent(Agent):
+    pass
+
+
 # Tests from test_llm_reconciliation.py
 def test_conversation_restart_with_nested_llms(tmp_path):
     """Test conversation restart with agent containing nested LLMs."""
@@ -106,7 +111,7 @@ def test_conversation_restarted_with_changed_working_directory(tmp_path_factory)
     )
 
 
-# Tests from test_local_conversation_tools_integration.py
+# Tests for agent tools restriction and LLM flexibility
 def test_conversation_allows_removing_unused_tools():
     """Test that removing tools that weren't used in history is allowed.
 
@@ -263,8 +268,6 @@ def test_conversation_fails_when_used_tool_is_missing():
         reduced_agent = Agent(llm=llm2, tools=reduced_tools)
 
         # This should raise - TerminalTool was used in history but is now missing
-        import pytest
-
         with pytest.raises(ValueError, match="tools that were used in history"):
             LocalConversation(
                 agent=reduced_agent,
@@ -328,57 +331,86 @@ def test_conversation_with_same_agent_succeeds():
         assert len(new_conversation.state.events) > 0
 
 
-def test_agent_resolve_diff_from_deserialized():
-    """Test agent's resolve_diff_from_deserialized method.
-
-    Includes tolerance for litellm_extra_body differences injected at CLI load time.
-    """
-    with tempfile.TemporaryDirectory():
-        # Create original agent
+def test_conversation_with_different_llm_succeeds():
+    """Test that using an agent with different LLM succeeds (LLM can change)."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Create and save conversation with original agent
         tools = [Tool(name="TerminalTool")]
-        llm = LLM(
+        llm1 = LLM(
             model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
         )
-        original_agent = Agent(llm=llm, tools=tools)
+        original_agent = Agent(llm=llm1, tools=tools)
+        conversation = LocalConversation(
+            agent=original_agent,
+            workspace=temp_dir,
+            persistence_dir=temp_dir,
+            visualizer=None,
+        )
 
-        # Serialize and deserialize to simulate persistence
-        serialized = original_agent.model_dump_json()
-        deserialized_agent = AgentBase.model_validate_json(serialized)
+        # Send a message to create some state
+        conversation.send_message(
+            Message(role="user", content=[TextContent(text="test message")])
+        )
 
-        # Create runtime agent with same configuration
+        conversation_id = conversation.state.id
+        del conversation
+
+        # Create new conversation with different LLM - this should succeed
         llm2 = LLM(
-            model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
+            model="gpt-4o",  # Different model
+            api_key=SecretStr("different-key"),  # Different key
+            usage_id="different-llm",
         )
-        runtime_agent = Agent(llm=llm2, tools=tools)
-
-        # Should resolve successfully
-        resolved = runtime_agent.resolve_diff_from_deserialized(deserialized_agent)
-        # Test model_dump equality
-        assert resolved.model_dump(mode="json") == runtime_agent.model_dump(mode="json")
-        assert resolved.llm.model == runtime_agent.llm.model
-        assert resolved.__class__ == runtime_agent.__class__
-
-        # Now simulate CLI injecting dynamic litellm_extra_body metadata at load time
-        injected = deserialized_agent.model_copy(
-            update={
-                "llm": deserialized_agent.llm.model_copy(
-                    update={
-                        "litellm_extra_body": {
-                            "metadata": {
-                                "session_id": "sess-123",
-                                "tags": ["app:openhands", "model:gpt-4o-mini"],
-                                "trace_version": "1.2.3",
-                            }
-                        }
-                    }
-                )
-            }
-        )
-
-        # Reconcile again: differences in litellm_extra_body should be allowed and
-        # the runtime value should be preferred without raising an error.
-        resolved2 = runtime_agent.resolve_diff_from_deserialized(injected)
-        assert resolved2.llm.litellm_extra_body == runtime_agent.llm.litellm_extra_body
+        different_agent = Agent(llm=llm2, tools=tools)
+
+        # This should succeed - LLM can be freely changed between sessions
+        new_conversation = LocalConversation(
+            agent=different_agent,
+            workspace=temp_dir,
+            persistence_dir=temp_dir,
+            conversation_id=conversation_id,
+            visualizer=None,
+        )
+
+        # Verify state was loaded and new agent with new LLM is used
+        assert len(new_conversation.state.events) > 0
+        assert new_conversation.agent.llm.model == "gpt-4o"
+        assert new_conversation.agent.llm.usage_id == "different-llm"
+
+
+def test_conversation_fails_when_agent_type_changes():
+    """Test that resuming with a different Agent class fails.
+
+    This is a hard compatibility requirement: we can only resume if the runtime
+    agent is the same class as the persisted agent.
+
+    Note: we define the alternative Agent at module scope to ensure the persisted
+    snapshot can be deserialized; otherwise, Pydantic rejects local classes.
+    """
+
+    tools = [Tool(name="TerminalTool")]
+
+    llm1 = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="llm")
+    llm2 = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="llm")
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        conversation = LocalConversation(
+            agent=Agent(llm=llm1, tools=tools),
+            workspace=temp_dir,
+            persistence_dir=temp_dir,
+            visualizer=None,
+        )
+        conversation_id = conversation.state.id
+        del conversation
+
+        with pytest.raises(ValueError, match=r"persisted agent is of type"):
+            LocalConversation(
+                agent=ModuleScopeOtherAgent(llm=llm2, tools=tools),
+                workspace=temp_dir,
+                persistence_dir=temp_dir,
+                conversation_id=conversation_id,
+                visualizer=None,
+            )
 
 
 @patch("openhands.sdk.llm.llm.litellm_completion")
@@ -458,6 +490,74 @@ def test_conversation_persistence_lifecycle(mock_completion):
         assert len(new_conversation.state.events) >= original_event_count + 2
 
 
+def test_conversation_resume_overrides_agent_llm_but_preserves_state_settings():
+    """Test resume behavior when changing runtime Agent/LLM settings.
+
+    Expectations:
+    - Some conversation *state* settings are persisted and should not be overridden
+      on resume (e.g., confirmation_policy, execution_status).
+    - Agent/LLM settings should come from the runtime-provided Agent on resume
+
+    This test covers the common workflow: start a persisted conversation, tweak a
+    couple of state settings, then resume with a different LLM configuration.
+    """
+
+    from openhands.sdk.security.confirmation_policy import AlwaysConfirm
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        tools = [Tool(name="TerminalTool")]
+
+        # Initial agent (persisted snapshot contains this agent config, but on resume
+        # we should use the runtime-provided agent).
+        llm1 = LLM(
+            model="gpt-5.1-codex-max",
+            api_key=SecretStr("test-key-1"),
+            usage_id="llm-1",
+            max_input_tokens=100_000,
+        )
+        agent1 = Agent(llm=llm1, tools=tools)
+
+        conversation = LocalConversation(
+            agent=agent1,
+            workspace=temp_dir,
+            persistence_dir=temp_dir,
+            visualizer=None,
+        )
+
+        # Persisted state settings (these should be restored from persistence).
+        conversation.state.confirmation_policy = AlwaysConfirm()
+        conversation.state.execution_status = ConversationExecutionStatus.STUCK
+
+        conversation_id = conversation.state.id
+        del conversation
+
+        # Resume with a different runtime Agent + LLM settings.
+        llm2 = LLM(
+            model="gpt-5.2",
+            api_key=SecretStr("test-key-2"),
+            usage_id="llm-2",
+            max_input_tokens=50_000,
+        )
+        agent2 = Agent(llm=llm2, tools=tools)
+
+        resumed = LocalConversation(
+            agent=agent2,
+            workspace=temp_dir,
+            persistence_dir=temp_dir,
+            conversation_id=conversation_id,
+            visualizer=None,
+        )
+
+        # Persisted settings should remain.
+        assert resumed.state.execution_status == ConversationExecutionStatus.STUCK
+        assert resumed.state.confirmation_policy.should_confirm()
+
+        # Runtime agent/LLM settings should override persisted agent snapshot.
+        assert resumed.agent.llm.model == "gpt-5.2"
+        assert resumed.agent.llm.max_input_tokens == 50_000
+        assert resumed.agent.llm.usage_id == "llm-2"
+
+
 def test_conversation_restart_with_different_agent_context():
     """
     Test conversation restart when agent_context differs.
diff --git a/tests/sdk/conversation/local/test_state_serialization.py b/tests/sdk/conversation/local/test_state_serialization.py
index 3fee0ba85e..eaee970b08 100644
--- a/tests/sdk/conversation/local/test_state_serialization.py
+++ b/tests/sdk/conversation/local/test_state_serialization.py
@@ -9,16 +9,11 @@
 from pydantic import SecretStr, ValidationError
 
 from openhands.sdk import Agent, Conversation
-from openhands.sdk.agent.base import AgentBase
 from openhands.sdk.conversation.impl.local_conversation import LocalConversation
 from openhands.sdk.conversation.state import (
     ConversationExecutionStatus,
     ConversationState,
 )
-from openhands.sdk.conversation.types import (
-    ConversationCallbackType,
-    ConversationTokenCallbackType,
-)
 from openhands.sdk.event.llm_convertible import MessageEvent, SystemPromptEvent
 from openhands.sdk.llm import LLM, Message, TextContent
 from openhands.sdk.llm.llm_registry import RegistryEvent
@@ -427,8 +422,79 @@ def test_conversation_state_thread_safety():
     assert not state.owned()
 
 
-def test_agent_resolve_diff_different_class_raises_error():
-    """Test that resolve_diff_from_deserialized raises error for different agent classes."""  # noqa: E501
+def test_agent_pydantic_validation_on_creation():
+    """Test that Pydantic validation happens when creating agents."""
+    # Valid agent creation - Pydantic validates
+    llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm")
+    agent = Agent(llm=llm, tools=[])
+    assert agent.llm.model == "gpt-4o-mini"
+
+    # Invalid LLM creation should fail Pydantic validation
+    with pytest.raises(ValueError, match="model must be specified"):
+        LLM(model="", api_key=SecretStr("test-key"), usage_id="test-llm")
+
+
+def test_agent_verify_validates_tools_match():
+    """Test that agent.verify() validates tools match between runtime and persisted."""
+    from openhands.sdk.agent import AgentBase
+    from openhands.sdk.tool import Tool
+
+    llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm")
+
+    # Create original agent with two tools
+    original_agent = Agent(
+        llm=llm, tools=[Tool(name="TerminalTool"), Tool(name="FileEditorTool")]
+    )
+
+    # Serialize and deserialize to simulate persistence
+    serialized = original_agent.model_dump_json()
+    persisted_agent = AgentBase.model_validate_json(serialized)
+
+    # Runtime agent with same tools should succeed
+    same_tools_agent = Agent(
+        llm=llm, tools=[Tool(name="TerminalTool"), Tool(name="FileEditorTool")]
+    )
+    result = same_tools_agent.verify(persisted_agent)
+    assert result is same_tools_agent
+
+    # Runtime agent with different tools should fail
+    different_tools_agent = Agent(llm=llm, tools=[Tool(name="TerminalTool")])
+    with pytest.raises(
+        ValueError, match="Tools don't match between runtime and persisted agents"
+    ):
+        different_tools_agent.verify(persisted_agent)
+
+
+def test_agent_verify_allows_different_llm():
+    """Test that agent.verify() allows different LLM configuration."""
+    from openhands.sdk.agent import AgentBase
+    from openhands.sdk.tool import Tool
+
+    tools = [Tool(name="TerminalTool")]
+
+    # Create original agent
+    llm1 = LLM(model="gpt-4o-mini", api_key=SecretStr("key1"), usage_id="llm1")
+    original_agent = Agent(llm=llm1, tools=tools)
+
+    # Serialize and deserialize
+    serialized = original_agent.model_dump_json()
+    persisted_agent = AgentBase.model_validate_json(serialized)
+
+    # Runtime agent with different LLM should succeed (LLM can change freely)
+    llm2 = LLM(model="gpt-4o", api_key=SecretStr("key2"), usage_id="llm2")
+    different_llm_agent = Agent(llm=llm2, tools=tools)
+    result = different_llm_agent.verify(persisted_agent)
+    assert result is different_llm_agent
+    assert result.llm.model == "gpt-4o"
+
+
+def test_agent_verify_different_class_raises_error():
+    """Test that agent.verify() raises error for different agent classes."""
+    from openhands.sdk.agent.base import AgentBase
+    from openhands.sdk.conversation.types import (
+        ConversationCallbackType,
+        ConversationTokenCallbackType,
+    )
 
     class DifferentAgent(AgentBase):
         def __init__(self):
@@ -454,8 +520,8 @@ def step(
     original_agent = Agent(llm=llm, tools=[])
     different_agent = DifferentAgent()
 
-    with pytest.raises(ValueError, match="Cannot resolve from deserialized"):
-        original_agent.resolve_diff_from_deserialized(different_agent)
+    with pytest.raises(ValueError, match="Cannot load from persisted"):
+        original_agent.verify(different_agent)
 
 
 def test_conversation_state_flags_persistence():
@@ -556,12 +622,102 @@ def test_conversation_with_agent_different_llm_config():
         assert new_dump == original_state_dump
 
 
-def test_conversation_state_stats_preserved_on_resume():
-    """Test that conversation stats are preserved when resuming a conversation.
+def test_resume_uses_runtime_workspace_and_max_iterations():
+    """Test that resume uses runtime-provided workspace and max_iterations."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        llm = LLM(
+            model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
+        )
+        agent = Agent(llm=llm, tools=[])
+        conv_id = uuid.UUID("12345678-1234-5678-9abc-123456789007")
+        persist_path = LocalConversation.get_persistence_dir(temp_dir, conv_id)
+
+        original_workspace = LocalWorkspace(working_dir="/original/path")
+        state = ConversationState.create(
+            workspace=original_workspace,
+            persistence_dir=persist_path,
+            agent=agent,
+            id=conv_id,
+            max_iterations=100,
+        )
+        assert state.max_iterations == 100
+
+        new_workspace = LocalWorkspace(working_dir="/new/path")
+        resumed_state = ConversationState.create(
+            workspace=new_workspace,
+            persistence_dir=persist_path,
+            agent=agent,
+            id=conv_id,
+            max_iterations=200,
+        )
+
+        assert resumed_state.workspace.working_dir == "/new/path"
+        assert resumed_state.max_iterations == 200
+
+
+def test_resume_preserves_persisted_execution_status_and_stuck_detection():
+    """Test that resume preserves execution_status and stuck_detection."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        llm = LLM(
+            model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
+        )
+        agent = Agent(llm=llm, tools=[])
+        conv_id = uuid.UUID("12345678-1234-5678-9abc-123456789008")
+        persist_path = LocalConversation.get_persistence_dir(temp_dir, conv_id)
+
+        state = ConversationState.create(
+            workspace=LocalWorkspace(working_dir="/tmp"),
+            persistence_dir=persist_path,
+            agent=agent,
+            id=conv_id,
+            stuck_detection=False,
+        )
+        state.execution_status = ConversationExecutionStatus.PAUSED
 
-    This is a regression test for the issue where stats (including context_window)
-    were being reset to zero when resuming a conversation.
-    """
+        resumed_state = ConversationState.create(
+            workspace=LocalWorkspace(working_dir="/tmp"),
+            persistence_dir=persist_path,
+            agent=agent,
+            id=conv_id,
+            stuck_detection=True,
+        )
+
+        assert resumed_state.execution_status == ConversationExecutionStatus.PAUSED
+        assert resumed_state.stuck_detection is False
+
+
+def test_resume_preserves_blocked_actions_and_messages():
+    """Test that resume preserves blocked_actions and blocked_messages."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        llm = LLM(
+            model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
+        )
+        agent = Agent(llm=llm, tools=[])
+        conv_id = uuid.UUID("12345678-1234-5678-9abc-123456789009")
+        persist_path = LocalConversation.get_persistence_dir(temp_dir, conv_id)
+
+        state = ConversationState.create(
+            workspace=LocalWorkspace(working_dir="/tmp"),
+            persistence_dir=persist_path,
+            agent=agent,
+            id=conv_id,
+        )
+        state.block_action("action-1", "dangerous action")
+        state.block_message("msg-1", "inappropriate content")
+
+        resumed_state = ConversationState.create(
+            workspace=LocalWorkspace(working_dir="/tmp"),
+            persistence_dir=persist_path,
+            agent=agent,
+            id=conv_id,
+        )
+
+        assert resumed_state.blocked_actions["action-1"] == "dangerous action"
+        assert resumed_state.blocked_messages["msg-1"] == "inappropriate content"
+
+
+def test_conversation_state_stats_preserved_on_resume():
+    """Regression: stats should not be reset when resuming a conversation."""
     with tempfile.TemporaryDirectory() as temp_dir:
         llm = LLM(
             model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
@@ -579,7 +735,6 @@ def test_conversation_state_stats_preserved_on_resume():
             id=conv_id,
         )
 
-        # Register the LLM and add some metrics
         state.stats.register_llm(RegistryEvent(llm=llm))
 
         # Add token usage with context_window
@@ -617,7 +772,6 @@ def test_conversation_state_stats_preserved_on_resume():
         )
         new_agent = Agent(llm=new_llm, tools=[])
 
-        # Resume the conversation
         resumed_state = ConversationState.create(
             workspace=LocalWorkspace(working_dir="/tmp"),
             persistence_dir=persist_path_for_state,
@@ -637,3 +791,30 @@ def test_conversation_state_stats_preserved_on_resume():
         assert (
             resumed_combined_metrics.accumulated_token_usage.context_window == 128000
         ), "Context window should be preserved after resume"
+
+
+def test_resume_with_conversation_id_mismatch_raises_error():
+    """Test that resuming with mismatched conversation ID raises error."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        llm = LLM(
+            model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm"
+        )
+        agent = Agent(llm=llm, tools=[])
+        original_id = uuid.UUID("12345678-1234-5678-9abc-12345678900b")
+        different_id = uuid.UUID("12345678-1234-5678-9abc-12345678900c")
+        persist_path = LocalConversation.get_persistence_dir(temp_dir, original_id)
+
+        ConversationState.create(
+            workspace=LocalWorkspace(working_dir="/tmp"),
+            persistence_dir=persist_path,
+            agent=agent,
+            id=original_id,
+        )
+
+        with pytest.raises(ValueError, match="Conversation ID mismatch"):
+            ConversationState.create(
+                workspace=LocalWorkspace(working_dir="/tmp"),
+                persistence_dir=persist_path,
+                agent=agent,
+                id=different_id,
+            )
diff --git a/tests/sdk/llm/test_llm_reconciliation.py b/tests/sdk/llm/test_llm_reconciliation.py
deleted file mode 100644
index 2cd86d75e2..0000000000
--- a/tests/sdk/llm/test_llm_reconciliation.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from pydantic import SecretStr
-
-from openhands.sdk import Agent
-from openhands.sdk.agent import AgentBase
-from openhands.sdk.llm import LLM
-
-
-def test_resolve_diff_ignores_litellm_extra_body_diffs():
-    tools = []
-    llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm")
-    original_agent = Agent(llm=llm, tools=tools)
-
-    serialized = original_agent.model_dump_json()
-    deserialized_agent = AgentBase.model_validate_json(serialized)
-
-    runtime_agent = Agent(
-        llm=LLM(
-            model="gpt-4o-mini",
-            api_key=SecretStr("test-key"),
-            usage_id="test-llm",
-        ),
-        tools=tools,
-    )
-
-    injected = deserialized_agent.model_copy(
-        update={
-            "llm": deserialized_agent.llm.model_copy(
-                update={
-                    "litellm_extra_body": {
-                        "metadata": {
-                            "session_id": "sess-xyz",
-                            "tags": ["app:openhands", "model:gpt-4o-mini"],
-                            "trace_version": "9.9.9",
-                        }
-                    }
-                }
-            )
-        }
-    )
-
-    resolved = runtime_agent.resolve_diff_from_deserialized(injected)
-    assert resolved.llm.litellm_extra_body == runtime_agent.llm.litellm_extra_body
diff --git a/tests/sdk/llm/test_llm_serialization.py b/tests/sdk/llm/test_llm_serialization.py
index b49771bf33..eeceef9723 100644
--- a/tests/sdk/llm/test_llm_serialization.py
+++ b/tests/sdk/llm/test_llm_serialization.py
@@ -62,6 +62,19 @@ def test_llm_secret_fields_serialization() -> None:
     assert deserialized_llm.aws_secret_access_key is None
 
 
+def test_llm_model_dump_json_masks_secrets() -> None:
+    """Test that JSON serialization masks secrets by default."""
+    llm = LLM(
+        usage_id="test-llm",
+        model="test-model",
+        api_key=SecretStr("secret-api-key"),
+    )
+
+    dumped = llm.model_dump_json()
+    assert "secret-api-key" not in dumped
+    assert "**********" in dumped
+
+
 def test_llm_excluded_fields_not_serialized() -> None:
     """Test that excluded fields are not included in serialization."""
     # Create LLM with excluded fields