Skip to content

Commit 8bcb389

Browse files
anuragsharanjuspaysharananurag998
authored andcommitted
feat: Context handling in realtime
1 parent b205f83 commit 8bcb389

File tree

4 files changed

+246
-26
lines changed

4 files changed

+246
-26
lines changed

docs/voice/pipeline.md

+124-1
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,14 @@ from agents.voice import (
103103
VoicePipelineConfig
104104
)
105105
from agents.voice.models.sdk_realtime import SDKRealtimeLLM
106+
from dataclasses import dataclass
107+
108+
# Define a simple context class for state management (optional)
109+
@dataclass
110+
class MyAppContext:
111+
"""Context for the voice assistant."""
112+
user_name: str = "User"
113+
interaction_count: int = 0
106114

107115
# Create the input, config, and model
108116
input_stream = StreamedAudioInput()
@@ -114,11 +122,15 @@ config = VoicePipelineConfig(
114122
)
115123
model = SDKRealtimeLLM(model_name="gpt-4o-realtime-preview")
116124

117-
# Create the pipeline with tools
125+
# Create an app context instance (optional)
126+
app_context = MyAppContext()
127+
128+
# Create the pipeline with tools and shared context
118129
pipeline = RealtimeVoicePipeline(
119130
model=model,
120131
tools=[get_weather, get_time],
121132
config=config,
133+
shared_context=app_context, # Optional: shared state for context-aware tools
122134
)
123135

124136
# Start the pipeline
@@ -147,6 +159,117 @@ while True:
147159
break
148160
```
149161

162+
### Using Shared Context with Tools
163+
164+
The `RealtimeVoicePipeline` supports passing a shared context object to tools, allowing them to access and modify shared state across multiple interactions. This is useful for building more complex voice applications that need to maintain state, such as:
165+
166+
- Tracking user preferences
167+
- Maintaining conversation history
168+
- Counting interactions
169+
- Storing user information
170+
171+
#### Setting up a shared context
172+
173+
To use shared context with tools:
174+
175+
1. Define a context class (typically a dataclass) to hold your application state
176+
2. Create an instance of this class
177+
3. Pass it to the `RealtimeVoicePipeline` using the `shared_context` parameter
178+
4. Create tools that accept a `RunContextWrapper[YourContextType]` as their first parameter
179+
180+
```python
181+
from dataclasses import dataclass
182+
from agents.run_context import RunContextWrapper
183+
from agents.tool import function_tool
184+
185+
# Define your context class
186+
@dataclass
187+
class MyAppContext:
188+
"""Context for the voice assistant."""
189+
user_name: str
190+
interaction_count: int = 0
191+
192+
# Create a context-aware tool
193+
@function_tool
194+
def greet_user_and_count(context: RunContextWrapper[MyAppContext]) -> str:
195+
"""Greets the user by name and counts interactions."""
196+
# Access and modify the context
197+
context.context.interaction_count += 1
198+
199+
return f"Hello {context.context.user_name}! This is interaction number {context.context.interaction_count}."
200+
201+
# Create another context-aware tool
202+
@function_tool
203+
def get_user_details(context: RunContextWrapper[MyAppContext]) -> dict:
204+
"""Gets user details from the context."""
205+
return {
206+
"user_name": context.context.user_name,
207+
"interaction_count": context.context.interaction_count
208+
}
209+
210+
# Create your application context
211+
app_context = MyAppContext(user_name="Alice", interaction_count=0)
212+
213+
# Create the pipeline with shared context
214+
pipeline = RealtimeVoicePipeline(
215+
model=model,
216+
tools=[get_weather, get_time, greet_user_and_count, get_user_details],
217+
config=config,
218+
shared_context=app_context, # Pass the context here
219+
)
220+
```
221+
222+
#### How it works
223+
224+
1. The `RealtimeVoicePipeline` passes the shared context to its internal `ToolExecutor`
225+
2. When the LLM calls a tool, the `ToolExecutor` checks if the tool's first parameter is named `context`
226+
3. If it is, the executor wraps your context object in a `RunContextWrapper` and passes it to the tool
227+
4. The tool can then access and modify your context object via `context.context`
228+
5. Since all tools share the same context object, changes made by one tool are visible to other tools in future calls
229+
230+
This mechanism allows your tools to maintain shared state across turns and interactions in your voice application, without needing to set up a separate state management system.
231+
232+
#### Context-Aware vs. Standard Tools
233+
234+
You can mix both context-aware and standard tools in the same `RealtimeVoicePipeline`:
235+
236+
```python
237+
# A standard tool (no context parameter)
238+
@function_tool
239+
def get_weather(city: str) -> dict:
240+
"""Gets the weather for the specified city."""
241+
return {"temperature": 72, "condition": "sunny"}
242+
243+
# A context-aware tool (has context parameter)
244+
@function_tool
245+
def update_user_preference(context: RunContextWrapper[MyAppContext], preference: str, value: str) -> str:
246+
"""Updates a user preference in the context."""
247+
if not hasattr(context.context, "preferences"):
248+
context.context.preferences = {}
249+
context.context.preferences[preference] = value
250+
return f"Updated {preference} to {value}"
251+
```
252+
253+
**When to use standard tools:**
254+
255+
- For stateless operations that don't need to remember information between calls
256+
- For simple lookups or calculations based solely on the input parameters
257+
- When integration with external APIs or services doesn't require user-specific state
258+
259+
**When to use context-aware tools:**
260+
261+
- When tools need to access or modify shared state
262+
- For personalization features that adapt to the user
263+
- To implement features that track usage or interactions
264+
- When information gathered in one tool call needs to be available to another tool
265+
266+
**Important notes:**
267+
268+
- The first parameter of a context-aware tool must be named `context` and should have a type annotation of `RunContextWrapper[YourContextType]`
269+
- Type hints are recommended but not required; the parameter name `context` is sufficient for the tool to be detected as context-aware
270+
- The actual object inside `context.context` will be the instance you passed to `shared_context` when creating the pipeline
271+
- All context-aware tools see the same context instance, so changes are immediately visible to all tools
272+
150273
### Turn Detection Modes
151274

152275
The realtime models can operate in different turn detection modes, controlled via the `turn_detection` setting:

examples/voice/realtime_assistant.py

+53-4
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,15 @@
1818
on applying for access to the realtime API.
1919
2020
Usage:
21-
python continuous_realtime_assistant.py
21+
python realtime_assistant.py
2222
"""
2323

2424
import asyncio
2525
import logging
2626
import os
2727
import time
2828
from typing import Dict, Any
29+
from dataclasses import dataclass
2930

3031
import numpy as np
3132
import sounddevice as sd # For microphone and speaker I/O
@@ -42,6 +43,7 @@
4243
)
4344
from agents.tool import function_tool, Tool
4445
from agents.voice.models.sdk_realtime import SDKRealtimeLLM
46+
from agents.run_context import RunContextWrapper
4547

4648
# Import the new event types from our SDK
4749
from agents.voice.realtime.model import (
@@ -60,6 +62,15 @@
6062
logger = logging.getLogger("realtime_assistant")
6163

6264

65+
# Define a dataclass for our application context
66+
@dataclass
67+
class MyAppContext:
68+
"""A simple context for the realtime voice assistant example."""
69+
70+
user_name: str
71+
interaction_count: int = 0
72+
73+
6374
# Define some sample tools
6475
@function_tool
6576
def get_weather(city: str) -> Dict[str, Any]:
@@ -75,6 +86,37 @@ def get_time(timezone: str = "UTC") -> Dict[str, Any]:
7586
return {"time": time.strftime("%H:%M:%S", time.gmtime()), "timezone": timezone}
7687

7788

89+
# Define a context-aware tool
90+
@function_tool
91+
def greet_user_and_count(context: RunContextWrapper[MyAppContext]) -> str:
92+
"""Greets the user by name and counts interactions."""
93+
logger.info(f"greet_user_and_count called with context: {context}")
94+
# Increment the interaction count
95+
context.context.interaction_count += 1
96+
97+
logger.info(
98+
f"Greeting user: {context.context.user_name}, "
99+
f"Interaction count: {context.context.interaction_count}"
100+
)
101+
102+
return f"Hello {context.context.user_name}! This is interaction number {context.context.interaction_count}."
103+
104+
105+
# Another context-aware tool that reads but doesn't modify the context
106+
@function_tool
107+
def get_user_details(context: RunContextWrapper[MyAppContext]) -> Dict[str, Any]:
108+
"""Gets the user's details from the context."""
109+
logger.info(f"get_user_details called with context: {context}")
110+
111+
logger.info(
112+
f"Returning user details: name={context.context.user_name}, count={context.context.interaction_count}"
113+
)
114+
return {
115+
"user_name": context.context.user_name,
116+
"interaction_count": context.context.interaction_count,
117+
}
118+
119+
78120
# Get the OpenAI API key from environment variables
79121
api_key = os.environ.get("OPENAI_API_KEY")
80122
if not api_key:
@@ -117,18 +159,22 @@ async def main():
117159
realtime_settings={
118160
"turn_detection": "server_vad", # Use server-side VAD
119161
"assistant_voice": "alloy",
120-
"system_message": "You are a helpful assistant that responds concisely.",
162+
"system_message": "You are a helpful assistant that responds concisely. You can use the greet_user_and_count tool to greet the user by name and the get_user_details tool to retrieve information about the user.",
121163
# Enable server-side noise / echo reduction
122164
"input_audio_noise_reduction": {},
123165
}
124166
)
125167
input_stream = StreamedAudioInput()
126168

127-
# Create the realtime pipeline
169+
# Create our application context
170+
app_context = MyAppContext(user_name="Anurag", interaction_count=0)
171+
172+
# Create the realtime pipeline with shared context
128173
pipeline = RealtimeVoicePipeline(
129174
model=model,
130-
tools=[get_weather, get_time],
175+
tools=[get_weather, get_time, greet_user_and_count, get_user_details],
131176
config=config,
177+
shared_context=app_context, # Pass the context to the pipeline
132178
)
133179

134180
# Track events and errors
@@ -321,6 +367,9 @@ async def toggle_push_to_talk_simulation():
321367

322368
logger.info(f"Total events processed: {event_count}")
323369

370+
# Print the final interaction count from the context
371+
logger.info(f"Final interaction count: {app_context.interaction_count}")
372+
324373
# Provide troubleshooting information if needed
325374
if error_occurred or event_count <= 1: # <=1 because turn_started is an event
326375
logger.error(f"Error occurred: {error_occurred}")

src/agents/voice/pipeline_realtime.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def __init__(
3737
model: RealtimeLLMModel | str | None = None,
3838
tools: Sequence[Tool] = (),
3939
config: VoicePipelineConfig | None = None,
40+
shared_context: Any | None = None,
4041
):
4142
"""Create a new real-time voice pipeline.
4243
@@ -45,6 +46,7 @@ def __init__(
4546
or a string identifier for a model from the provider.
4647
tools: A sequence of tools available to the LLM.
4748
config: The pipeline configuration. If not provided, a default will be used.
49+
shared_context: An optional context object that will be passed to tools when they are executed.
4850
"""
4951
if isinstance(model, str) or model is None:
5052
self._model_name_to_load: str | None = model
@@ -59,7 +61,8 @@ def __init__(
5961

6062
self._tools = tools
6163
self._config = config or VoicePipelineConfig()
62-
self._tool_executor = ToolExecutor(tools)
64+
self._shared_context = shared_context
65+
self._tool_executor = ToolExecutor(tools, shared_context=shared_context)
6366

6467
def _get_model(self) -> RealtimeLLMModel:
6568
"""Get the real-time LLM model to use."""

0 commit comments

Comments
 (0)