diff --git a/src/app/App.tsx b/src/app/App.tsx index e785ca7..1bbe871 100644 --- a/src/app/App.tsx +++ b/src/app/App.tsx @@ -245,18 +245,28 @@ function App() { const instructions = currentAgent?.instructions || ""; const tools = currentAgent?.tools || []; - const sessionUpdateEvent = { - type: "session.update", - session: { - modalities: ["text", "audio"], - instructions, + + const sessionConfig = { + modalities: ["text"], + input_audio_format: "pcm16", + input_audio_transcription: { model: "whisper-1" }, + instructions, + tools, + turn_detection: turnDetection, + }; + + + if (isAudioPlaybackEnabled) { + sessionConfig.modalities.push("audio"); // Changing this to "text" will disable audio + Object.assign(sessionConfig, { voice: "coral", - input_audio_format: "pcm16", output_audio_format: "pcm16", - input_audio_transcription: { model: "whisper-1" }, - turn_detection: turnDetection, - tools, - }, + }); + } + + const sessionUpdateEvent = { + type: "session.update", + session: sessionConfig, }; sendClientEvent(sessionUpdateEvent); diff --git a/src/app/hooks/useHandleServerEvent.ts b/src/app/hooks/useHandleServerEvent.ts index b564cdb..0595f4b 100644 --- a/src/app/hooks/useHandleServerEvent.ts +++ b/src/app/hooks/useHandleServerEvent.ts @@ -139,6 +139,16 @@ export function useHandleServerEvent({ break; } + case "conversation.item.text.delta": + case "response.text.delta": { + const itemId = serverEvent.item_id; + const deltaText = serverEvent.delta || ""; + if (itemId) { + updateTranscriptMessage(itemId, deltaText, true); + } + break; + } + case "conversation.item.input_audio_transcription.completed": { const itemId = serverEvent.item_id; const finalTranscript =