webup · webup · Oct 24, 2025 · Oct 22, 2025 · Oct 23, 2025
diff --git a/apps/sample-agent/src/sample_agent/context.py b/apps/sample-agent/src/sample_agent/context.py
@@ -1,24 +1,41 @@
 """Context schema for supervisor configuration."""
 
-from typing import Any
+from __future__ import annotations
 
-from pydantic import BaseModel, Field
+from dataclasses import asdict, dataclass, field
 
+from langgraph_up_devkits.context import BaseAgentContext
 
-class SupervisorContext(BaseModel):
-    """Context schema for supervisor configuration."""
 
-    model_name: str = Field(default="siliconflow:zai-org/GLM-4.5-Air", description="Default model name")
-    temperature: float = 0.7
-    max_tokens: int | None = None
-    debug_mode: bool = False
-    recursion_limit: int = Field(default=100, description="Recursion limit for agent execution")
+@dataclass(kw_only=True)
+class SupervisorContext(BaseAgentContext):
+    """Context schema for supervisor configuration.
+
+    Extends BaseAgentContext with supervisor-specific defaults.
+    Uses GLM-4.5-Air model by default for efficient coordination.
+
+    Inherits from BaseAgentContext:
+    - model: LLM identifier (overridden to siliconflow:zai-org/GLM-4.5-Air)
+    - temperature: Sampling temperature (default 0.7)
+    - max_tokens: Response token cap (default None)
+    - recursion_limit: LangGraph recursion depth (default 100)
+    - debug: Enable debug logging
+    - user_id: Optional user identifier
+    """
+
+    # Override model default for supervisor
+    model: str = field(
+        default="siliconflow:zai-org/GLM-4.5-Air",
+        metadata={
+            "description": "The name of the language model to use for the supervisor agent.",
+        },
+    )
 
     @classmethod
-    def default(cls) -> "SupervisorContext":
+    def default(cls) -> SupervisorContext:
         """Create default supervisor context."""
         return cls()
 
-    def to_dict(self) -> dict[str, Any]:
+    def to_dict(self) -> dict[str, dict[str, str | float | int | bool | None]]:
         """Convert to dictionary for RunnableConfig."""
-        return {"configurable": self.model_dump()}
+        return {"configurable": asdict(self)}
diff --git a/apps/sample-agent/src/sample_agent/graph.py b/apps/sample-agent/src/sample_agent/graph.py
@@ -24,11 +24,14 @@ def make_graph(config: RunnableConfig | None = None) -> CompiledStateGraph[Any,
 
     # Convert runnable config to context
     configurable = config.get("configurable", {})
-    context_kwargs = {k: v for k, v in configurable.items() if k in SupervisorContext.model_fields}
+    from dataclasses import fields
+
+    context_field_names = {f.name for f in fields(SupervisorContext)}
+    context_kwargs = {k: v for k, v in configurable.items() if k in context_field_names}
     context = SupervisorContext(**context_kwargs)
 
     # Load model based on configuration
-    model = load_chat_model(context.model_name)
+    model = load_chat_model(context.model)
 
     # Create agents with the configured model via make_graph functions
     math_agent = make_math_graph(config)

diff --git a/apps/sample-agent/src/sample_agent/state.py b/apps/sample-agent/src/sample_agent/state.py
@@ -1,22 +1,25 @@
-"""Simple state definition for Agent1 extending MessagesState."""
+"""State definition for sample-agent extending AgentState."""
 
-from typing import Annotated, TypedDict
+from typing import NotRequired
 
-from langchain_core.messages import BaseMessage
-from langgraph.graph.message import add_messages
+from langchain.agents import AgentState as BaseAgentState
 
 
-class AgentState(TypedDict):
-    """Simple state for sample-agent with task description support."""
+class AgentState(BaseAgentState):  # type: ignore[type-arg]
+    """State for sample-agent with additional fields.
 
-    # Core message history
-    messages: Annotated[list[BaseMessage], add_messages]
+    Extends langchain.agents.AgentState which provides:
+    - messages: Annotated[list[BaseMessage], add_messages]
+    - jump_to: NotRequired[Annotated[JumpTo | None, EphemeralValue, PrivateStateAttr]]
+    - structured_response: NotRequired[Annotated[ResponseT, OmitFromInput]]
+    """
 
-    # Required for create_react_agent
+    # Required by create_react_agent
     remaining_steps: int
 
-    # Task management - following the reference pattern
-    task_description: str | None
+    # Additional fields for supervisor pattern
+    task_description: NotRequired[str | None]
+    active_agent: NotRequired[str | None]
 
-    # Active agent tracking
-    active_agent: str | None
+
+__all__ = ["AgentState"]
diff --git a/apps/sample-agent/src/sample_agent/subagents/math.py b/apps/sample-agent/src/sample_agent/subagents/math.py
@@ -27,11 +27,14 @@ def make_graph(config: RunnableConfig | None = None) -> CompiledStateGraph[Any,
 
     # Convert runnable config to context
     configurable = config.get("configurable", {})
-    context_kwargs = {k: v for k, v in configurable.items() if k in SupervisorContext.model_fields}
+    from dataclasses import fields
+
+    context_field_names = {f.name for f in fields(SupervisorContext)}
+    context_kwargs = {k: v for k, v in configurable.items() if k in context_field_names}
     context = SupervisorContext(**context_kwargs)
 
     # Load model based on configuration
-    model = load_chat_model(context.model_name)
+    model = load_chat_model(context.model)
 
     # Create and return the math agent directly
     return create_agent(

diff --git a/apps/sample-agent/src/sample_agent/subagents/research.py b/apps/sample-agent/src/sample_agent/subagents/research.py
@@ -27,11 +27,14 @@ def make_graph(config: RunnableConfig | None = None) -> CompiledStateGraph[Any,
 
     # Convert runnable config to context
     configurable = config.get("configurable", {})
-    context_kwargs = {k: v for k, v in configurable.items() if k in SupervisorContext.model_fields}
+    from dataclasses import fields
+
+    context_field_names = {f.name for f in fields(SupervisorContext)}
+    context_kwargs = {k: v for k, v in configurable.items() if k in context_field_names}
     context = SupervisorContext(**context_kwargs)
 
     # Load model based on configuration
-    model = load_chat_model(context.model_name)
+    model = load_chat_model(context.model)
 
     # Create and return the research agent directly
     return create_agent(

diff --git a/apps/sample-agent/src/sample_agent/tools/handoff.py b/apps/sample-agent/src/sample_agent/tools/handoff.py
@@ -2,8 +2,8 @@
 
 from typing import Annotated, Any
 
-from langchain_core.messages import ToolMessage
-from langchain_core.tools import BaseTool, InjectedToolCallId, tool
+from langchain.messages import ToolMessage
+from langchain.tools import BaseTool, InjectedToolCallId, tool
 from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
 from langgraph_supervisor.handoff import METADATA_KEY_HANDOFF_DESTINATION

diff --git a/apps/sample-agent/tests/conftest.py b/apps/sample-agent/tests/conftest.py
@@ -7,8 +7,8 @@
 from unittest.mock import patch
 
 import pytest
-from langchain_core.language_models.chat_models import BaseChatModel
-from langchain_core.messages import AIMessage
+from langchain.chat_models import BaseChatModel
+from langchain.messages import AIMessage
 from langchain_core.outputs import ChatGeneration, LLMResult
 
 

diff --git a/apps/sample-agent/tests/integration/test_handoff.py b/apps/sample-agent/tests/integration/test_handoff.py
@@ -1,7 +1,7 @@
 """Integration tests for handoff functionality with real models and workflows."""
 
 import pytest
-from langchain_core.messages import HumanMessage
+from langchain.messages import HumanMessage
 from sample_agent.graph import make_graph
 from sample_agent.state import AgentState
 from sample_agent.tools.handoff import create_custom_handoff_tool

diff --git a/apps/sample-agent/tests/unit/test_graph.py b/apps/sample-agent/tests/unit/test_graph.py
@@ -3,7 +3,7 @@
 from unittest.mock import Mock, patch
 
 import pytest
-from langchain_core.messages import AIMessage, HumanMessage
+from langchain.messages import AIMessage, HumanMessage
 from sample_agent.state import AgentState
 from sample_agent.tools.basic import add, multiply, web_search
 from sample_agent.tools.handoff import create_custom_handoff_tool
@@ -92,7 +92,7 @@ def test_math_agent_creation(self, mock_load_model, mock_create_agent):
         mock_compiled_graph = Mock()
         mock_create_agent.return_value = mock_compiled_graph
 
-        config = {"configurable": {"model_name": "test_model"}}
+        config = {"configurable": {"model": "test_model"}}
         result = make_graph(config)
 
         mock_load_model.assert_called_once_with("test_model")
@@ -115,7 +115,7 @@ def test_research_agent_creation(self, mock_load_model, mock_create_agent):
         mock_compiled_graph = Mock()
         mock_create_agent.return_value = mock_compiled_graph
 
-        config = {"configurable": {"model_name": "test_model"}}
+        config = {"configurable": {"model": "test_model"}}
         result = make_graph(config)
 
         mock_load_model.assert_called_once_with("test_model")

diff --git a/apps/sample-deep-agent/README.md b/apps/sample-deep-agent/README.md
@@ -174,16 +174,146 @@ Automatic storage of:
 
 ## Testing
 
-Run the test suite:
+### Unit Tests
+
+Run unit tests (no API keys required):
 
 ```bash
-# Unit tests
 make unit sample-deep-agent
+```
+
+### Integration Tests
+
+Integration tests require API keys and make real API calls:
+
+```bash
+# Set up environment variables first
+export SILICONFLOW_API_KEY=your_key_here
+export TAVILY_API_KEY=your_key_here
 
-# Integration tests (requires API keys)
+# Run all integration tests
 make integration sample-deep-agent
 
-# All tests
+# Run specific HITL integration test
+cd apps/sample-deep-agent
+uv run pytest tests/integration/test_hitl.py::TestHITLWorkflow::test_comprehensive_hitl_workflow -v -s
+```
+
+### Human-in-the-Loop (HITL) Testing
+
+The agent includes comprehensive HITL integration tests that verify interrupt functionality with real LLM calls.
+
+#### HITL Configuration
+
+Configure interrupts by passing `interrupt_on` and `subagent_interrupts` to `make_graph()`:
+
+```python
+from sample_deep_agent.graph import make_graph
+
+# Define interrupt configuration
+interrupt_on = {
+    "task": {"allowed_decisions": ["approve", "reject"]},  # Only approve/reject
+    "write_todos": False,  # Don't interrupt write_todos
+    "think_tool": False,  # Don't interrupt think_tool
+    "deep_web_search": True,  # Interrupt at top level
+}
+
+subagent_interrupts = {
+    "research-agent": {
+        "deep_web_search": True,  # Interrupt in subagent too
+        "think_tool": False,  # Don't interrupt think_tool in subagent
+    }
+}
+
+# Create agent with HITL configuration
+agent = make_graph(
+    config={"configurable": {"max_todos": 1}},
+    interrupt_on=interrupt_on,
+    subagent_interrupts=subagent_interrupts
+)
+```
+
+#### Interrupt Decision Types
+
+Three types of decisions are supported:
+
+1. **Approve**: Execute tool with original arguments
+   ```python
+   {"type": "approve"}
+   ```
+
+2. **Reject**: Skip tool execution (agent receives error message)
+   ```python
+   {"type": "reject"}
+   ```
+
+3. **Edit**: Modify arguments before execution
+   ```python
+   {
+       "type": "edit",
+       "edited_action": {
+           "name": "tool_name",
+           "args": {"modified": "arguments"}
+       }
+   }
+   ```
+
+#### HITL Workflow Example
+
+```python
+import uuid
+from langchain.messages import HumanMessage
+from langgraph.types import Command
+
+# Use thread_id for state persistence (required for HITL)
+thread_id = str(uuid.uuid4())
+thread_config = {"configurable": {"thread_id": thread_id}}
+
+# Initial invocation
+result = await agent.ainvoke(
+    {"messages": [HumanMessage(content="What are the core features of LangChain v1?")]},
+    config=thread_config
+)
+
+# Handle interrupts
+while result.get("__interrupt__"):
+    interrupts = result["__interrupt__"][0].value
+    action_requests = interrupts["action_requests"]
+
+    # Make decisions for each action
+    decisions = []
+    for action in action_requests:
+        if action["name"] == "task":
+            decisions.append({"type": "approve"})
+        elif action["name"] == "deep_web_search":
+            decisions.append({"type": "reject"})
+        else:
+            decisions.append({"type": "approve"})
+
+    # Resume with decisions (must use same thread_config)
+    result = await agent.ainvoke(
+        Command(resume={"decisions": decisions}),
+        config=thread_config
+    )
+
+# Get final result
+final_message = result["messages"][-1]
+print(final_message.content)
+```
+
+#### Key Features Tested
+
+- ✅ Allowed decisions configuration (restrict to approve/reject only)
+- ✅ Top-level tool approval/rejection
+- ✅ Subagent-specific interrupt overrides
+- ✅ Multiple concurrent tool interrupts
+- ✅ Agent resilience when tools are rejected
+- ✅ Verification that rejected tools don't execute
+
+### All Tests
+
+```bash
+# Run all tests across the monorepo
 make test
 ```
 

diff --git a/apps/sample-deep-agent/pyproject.toml b/apps/sample-deep-agent/pyproject.toml
@@ -9,7 +9,7 @@ readme = "README.md"
 license = { text = "MIT" }
 requires-python = ">=3.11,<4.0"
 dependencies = [
-    "deepagents>=0.1.1",
+    "deepagents>=0.1.3",
     "langgraph-up-devkits",
 ]