From 56cc4a9152baa16ff4575a8996fa81fe22cb5645 Mon Sep 17 00:00:00 2001 From: Christian Stuff Date: Wed, 17 Dec 2025 08:01:57 +0100 Subject: [PATCH 1/5] implement botOutput event handling with fallback support for older server versions --- package.json | 4 +- package/package.json | 9 +- .../src/components/ConversationProvider.tsx | 52 +-- .../components/elements/TranscriptOverlay.tsx | 44 ++- package/src/components/panels/EventsPanel.tsx | 11 +- package/src/hooks/index.ts | 5 + package/src/hooks/useBotMessages.ts | 324 ++++++++++++++++++ package/src/stores/conversationStore.ts | 2 +- package/src/utils/version.ts | 61 ++++ pnpm-lock.yaml | 83 +++-- 10 files changed, 512 insertions(+), 83 deletions(-) create mode 100644 package/src/hooks/useBotMessages.ts create mode 100644 package/src/utils/version.ts diff --git a/package.json b/package.json index 0b0be1f..700eb2b 100644 --- a/package.json +++ b/package.json @@ -9,8 +9,8 @@ "examples/*" ], "dependencies": { - "@daily-co/daily-js": "^0.84.0", - "@pipecat-ai/client-js": "^1.4.0", + "@daily-co/daily-js": "^0.85.0", + "@pipecat-ai/client-js": "^1.5.0", "@pipecat-ai/client-react": "^1.1.0", "react": "^19.2.1", "react-dom": "^19.2.1", diff --git a/package/package.json b/package/package.json index d163903..8e1dc79 100644 --- a/package/package.json +++ b/package/package.json @@ -103,17 +103,18 @@ "lucide-react": "^0.511.0", "react-chartjs-2": "^5.3.0", "react-resizable-panels": "^3.0.6", + "semver": "^7.6.3", "tailwind-merge": "^3.3.1", "zustand": "^5.0.8" }, "devDependencies": { - "@daily-co/daily-js": "^0.80.0", + "@daily-co/daily-js": "^0.85.0", "@eslint/js": "^9.36.0", "@ladle/react": "^5.0.3", - "@pipecat-ai/client-js": "^1.4.0", + "@pipecat-ai/client-js": "^1.5.0", "@pipecat-ai/client-react": "^1.1.0", - "@pipecat-ai/daily-transport": "^1.4.0", - "@pipecat-ai/small-webrtc-transport": "^1.5.0", + "@pipecat-ai/daily-transport": "^1.5.0", + "@pipecat-ai/small-webrtc-transport": "^1.8.0", "@tailwindcss/vite": "^4.1.13", "@types/node": "^22.18.8", "@types/react": "^19.1.16", diff --git a/package/src/components/ConversationProvider.tsx b/package/src/components/ConversationProvider.tsx index e340821..710f62b 100644 --- a/package/src/components/ConversationProvider.tsx +++ b/package/src/components/ConversationProvider.tsx @@ -3,6 +3,7 @@ import { type ConversationMessage, type ConversationMessagePart, } from "@/types/conversation"; +import { useBotMessages } from "@/hooks/useBotMessages"; import { RTVIEvent } from "@pipecat-ai/client-js"; import { useRTVIClientEvent } from "@pipecat-ai/client-react"; import { createContext, useContext, useRef } from "react"; @@ -29,7 +30,6 @@ export const ConversationProvider = ({ children }: React.PropsWithChildren) => { injectMessage, upsertUserTranscript, updateAssistantText, - startAssistantLlmStream, } = useConversationStore(); const userStoppedTimeout = useRef>(undefined); @@ -39,22 +39,8 @@ export const ConversationProvider = ({ children }: React.PropsWithChildren) => { clearMessages(); }); - useRTVIClientEvent(RTVIEvent.BotLlmStarted, () => { - startAssistantLlmStream(); - // Nudge a reset counter so any consumer logic can infer fresh turn if needed - assistantStreamResetRef.current += 1; - }); - - useRTVIClientEvent(RTVIEvent.BotLlmText, (data) => { - updateAssistantText(data.text, false, "llm"); - }); - - useRTVIClientEvent(RTVIEvent.BotLlmStopped, () => { - finalizeLastMessage("assistant"); - }); - - useRTVIClientEvent(RTVIEvent.BotTtsStarted, () => { - // Start a new assistant message for TTS if there isn't one already in progress + // Helper to ensure assistant message exists + const ensureAssistantMessage = () => { const store = useConversationStore.getState(); const lastAssistantIndex = store.messages.findLastIndex( (msg: ConversationMessage) => msg.role === "assistant", @@ -70,15 +56,37 @@ export const ConversationProvider = ({ children }: React.PropsWithChildren) => { final: false, parts: [], }); + assistantStreamResetRef.current += 1; + return true; } - }); + return false; + }; - useRTVIClientEvent(RTVIEvent.BotTtsText, (data) => { - updateAssistantText(data.text, false, "tts"); + // Use the bot messages hook to handle BotOutput detection and fallback + useBotMessages({ + onBotMessageStarted: () => { + ensureAssistantMessage(); + }, + onBotMessageChunk: (type, text) => { + // The hook handles spacing for BotOutput chunks internally + // For legacy events, spacing is handled by the store for TTS + updateAssistantText(text, false, type); + }, + onBotMessageEnded: () => { + const store = useConversationStore.getState(); + const lastAssistant = store.messages.findLast( + (m: ConversationMessage) => m.role === "assistant", + ); + + if (lastAssistant && !lastAssistant.final) { + finalizeLastMessage("assistant"); + } + }, }); - useRTVIClientEvent(RTVIEvent.BotTtsStopped, () => { - // Finalize the TTS text stream + useRTVIClientEvent(RTVIEvent.BotStoppedSpeaking, () => { + // Finalize the assistant message when bot stops speaking + // This works for both BotOutput and fallback scenarios const store = useConversationStore.getState(); const lastAssistant = store.messages.findLast( (m: ConversationMessage) => m.role === "assistant", diff --git a/package/src/components/elements/TranscriptOverlay.tsx b/package/src/components/elements/TranscriptOverlay.tsx index 193575b..3c0afa8 100644 --- a/package/src/components/elements/TranscriptOverlay.tsx +++ b/package/src/components/elements/TranscriptOverlay.tsx @@ -1,11 +1,12 @@ "use client"; import { cn } from "@/lib/utils"; -import { type BotTTSTextData, RTVIEvent } from "@pipecat-ai/client-js"; +import { RTVIEvent } from "@pipecat-ai/client-js"; import { usePipecatClientTransportState, useRTVIClientEvent, } from "@pipecat-ai/client-react"; +import { useBotMessages } from "@/hooks/useBotMessages"; import { cva } from "class-variance-authority"; import { useCallback, useState } from "react"; @@ -183,11 +184,18 @@ export const TranscriptOverlay = ({ const [turnEnd, setIsTurnEnd] = useState(false); const transportState = usePipecatClientTransportState(); - useRTVIClientEvent( - RTVIEvent.BotTtsText, - useCallback( - (event: BotTTSTextData) => { - if (participant === "local") { + // Use the bot messages hook to handle BotOutput detection and fallback + useBotMessages({ + onBotMessageChunk: (type, text, metadata) => { + if (participant === "local") { + return; + } + + // Only process TTS chunks (spoken content) + if (type === "tts") { + // For BotOutput events, only process word-level chunks + // For legacy events, process all chunks + if (metadata?.aggregated_by && metadata.aggregated_by !== "word") { return; } @@ -196,24 +204,22 @@ export const TranscriptOverlay = ({ setIsTurnEnd(false); } - setTranscript((prev) => [...prev, event.text]); - }, - [turnEnd, participant], - ), - ); - - useRTVIClientEvent( - RTVIEvent.BotStoppedSpeaking, - useCallback(() => { + setTranscript((prev) => [...prev, text]); + } + }, + onBotMessageEnded: (type) => { if (participant === "local") { return; } - setIsTurnEnd(true); - }, [participant]), - ); + // Only handle TTS ended events + if (type === "tts") { + setIsTurnEnd(true); + } + }, + }); useRTVIClientEvent( - RTVIEvent.BotTtsStopped, + RTVIEvent.BotStoppedSpeaking, useCallback(() => { if (participant === "local") { return; diff --git a/package/src/components/panels/EventsPanel.tsx b/package/src/components/panels/EventsPanel.tsx index fc6081c..051846e 100644 --- a/package/src/components/panels/EventsPanel.tsx +++ b/package/src/components/panels/EventsPanel.tsx @@ -6,7 +6,8 @@ import { PanelTitle, } from "@/components/ui/panel"; import { cn } from "@/lib/utils"; -import { RTVIEvent } from "@pipecat-ai/client-js"; +import { BotOutputData, RTVIEvent } from "@pipecat-ai/client-js"; + import { usePipecatClient, usePipecatClientTransportState, @@ -105,6 +106,14 @@ export const EventsPanel: React.FC = ({ collapsed = false }) => { }); }); + useRTVIClientEvent(RTVIEvent.BotOutput, (data: BotOutputData) => { + addEvent({ + event: RTVIEvent.BotOutput, + message: `Bot output (${data.aggregated_by}, spoken: ${data.spoken}): ${data.text}`, + time: new Date().toLocaleTimeString(), + }); + }); + useRTVIClientEvent(RTVIEvent.Connected, () => { addEvent({ event: RTVIEvent.Connected, diff --git a/package/src/hooks/index.ts b/package/src/hooks/index.ts index 6de7b03..35a699d 100644 --- a/package/src/hooks/index.ts +++ b/package/src/hooks/index.ts @@ -13,3 +13,8 @@ export type { UsePipecatEventStreamOptions, } from "./usePipecatEventStream"; export { useTheme } from "./useTheme"; +export { useBotMessages } from "./useBotMessages"; +export type { + UseBotMessagesCallbacks, + BotMessageChunkMetadata, +} from "./useBotMessages"; diff --git a/package/src/hooks/useBotMessages.ts b/package/src/hooks/useBotMessages.ts new file mode 100644 index 0000000..f380ac9 --- /dev/null +++ b/package/src/hooks/useBotMessages.ts @@ -0,0 +1,324 @@ +import { BotOutputData, BotReadyData, RTVIEvent } from "@pipecat-ai/client-js"; +import { useRTVIClientEvent } from "@pipecat-ai/client-react"; +import { useCallback, useRef, useState } from "react"; +import { isMinVersion } from "@/utils/version"; + +/** + * Cached data from old BotTts/BotLlm events during probe period + */ +interface CachedBotEvents { + llmText: string; + ttsText: string; + llmStarted: boolean; + ttsStarted: boolean; + llmStopped: boolean; + ttsStopped: boolean; +} + +/** + * Metadata for bot message chunks (only available for BotOutput events) + */ +export interface BotMessageChunkMetadata { + /** + * The aggregation type used for this output (e.g., "sentence", "word") + */ + aggregated_by?: "sentence" | "word" | string; +} + +/** + * Unified callbacks for handling bot message events. + * The distinction between BotOutput and legacy events is handled internally. + */ +export interface UseBotMessagesCallbacks { + /** + * Called when a bot message stream starts for a given type. + * @param type - The message type: "llm" for unspoken content, "tts" for spoken content + */ + onBotMessageStarted?: (type: "llm" | "tts") => void; + /** + * Called for each text chunk in a bot message stream. + * @param type - The message type: "llm" for unspoken content, "tts" for spoken content + * @param text - The text chunk + * @param metadata - Optional metadata (only available for BotOutput events) + */ + onBotMessageChunk?: ( + type: "llm" | "tts", + text: string, + metadata?: BotMessageChunkMetadata, + ) => void; + /** + * Called when a bot message stream ends for a given type. + * @param type - The message type: "llm" for unspoken content, "tts" for spoken content + */ + onBotMessageEnded?: (type: "llm" | "tts") => void; +} + +/** + * Hook for handling bot messages with automatic BotOutput support detection and fallback. + * + * This hook automatically detects whether the server supports BotOutput events by checking + * the BotReady event's library version information. BotOutput is supported in pipecat 0.0.98+. + * During the period before BotReady is received, it caches old event data. Once support is + * determined, it applies cached data and routes events accordingly. + * + * @param callbacks - Callback functions for handling bot message events + * @returns Object containing botOutputSupported status + */ +export function useBotMessages(callbacks: UseBotMessagesCallbacks) { + const [botOutputSupported, setBotOutputSupported] = useState( + null, + ); // null = unknown, true = supported, false = not supported + + const cachedEventsRef = useRef({ + llmText: "", + ttsText: "", + llmStarted: false, + ttsStarted: false, + llmStopped: false, + ttsStopped: false, + }); + + // Track message stream state for BotOutput events + const botOutputStreamStateRef = useRef<{ + llmStarted: boolean; + ttsStarted: boolean; + lastChunkText: { llm: string; tts: string }; + }>({ + llmStarted: false, + ttsStarted: false, + lastChunkText: { llm: "", tts: "" }, + }); + + // Reset state on connection + useRTVIClientEvent(RTVIEvent.Connected, () => { + setBotOutputSupported(null); + cachedEventsRef.current = { + llmText: "", + ttsText: "", + llmStarted: false, + ttsStarted: false, + llmStopped: false, + ttsStopped: false, + }; + botOutputStreamStateRef.current = { + llmStarted: false, + ttsStarted: false, + lastChunkText: { llm: "", tts: "" }, + }; + }); + + // Check BotOutput support from BotReady event + useRTVIClientEvent(RTVIEvent.BotReady, (botData: BotReadyData) => { + // Type guard to check if about has the expected structure + const about = + botData.about && + typeof botData.about === "object" && + "library" in botData.about && + "library_version" in botData.about + ? (botData.about as { library: string; library_version: string }) + : undefined; + + // Check if library information is available + if (about?.library && about?.library_version) { + // BotOutput is supported in pipecat 0.0.98+ + const supportsBotOutput = isMinVersion(about.library_version, [0, 0, 98]); + setBotOutputSupported(supportsBotOutput); + + // If we determined support and have cached data, apply it + if (supportsBotOutput && cachedEventsRef.current) { + applyCachedData(); + // Reset stream state after applying cached data + botOutputStreamStateRef.current = { + llmStarted: false, + ttsStarted: false, + lastChunkText: { llm: "", tts: "" }, + }; + } else if (!supportsBotOutput) { + // If BotOutput is not supported, apply cached data as legacy events + applyCachedData(); + } + } else { + // No library information available - assume BotOutput is not supported + setBotOutputSupported(false); + // Apply any cached data as legacy events + applyCachedData(); + } + }); + + // Apply cached data helper + const applyCachedData = useCallback(() => { + const cached = cachedEventsRef.current; + + // Apply cached started events + if (cached.llmStarted && callbacks.onBotMessageStarted) { + callbacks.onBotMessageStarted("llm"); + } + if (cached.ttsStarted && callbacks.onBotMessageStarted) { + callbacks.onBotMessageStarted("tts"); + } + + // Apply cached text chunks + if (cached.llmText && callbacks.onBotMessageChunk) { + callbacks.onBotMessageChunk("llm", cached.llmText); + } + if (cached.ttsText && callbacks.onBotMessageChunk) { + callbacks.onBotMessageChunk("tts", cached.ttsText); + } + + // Apply cached ended events + if (cached.llmStopped && callbacks.onBotMessageEnded) { + callbacks.onBotMessageEnded("llm"); + } + if (cached.ttsStopped && callbacks.onBotMessageEnded) { + callbacks.onBotMessageEnded("tts"); + } + + // Clear cache after applying + cachedEventsRef.current = { + llmText: "", + ttsText: "", + llmStarted: false, + ttsStarted: false, + llmStopped: false, + ttsStopped: false, + }; + }, [callbacks]); + + // BotOutput handler - maps to unified callbacks + useRTVIClientEvent(RTVIEvent.BotOutput, (data: BotOutputData) => { + // Only process BotOutput if it's supported + if (botOutputSupported === true) { + // Derive message type from BotOutput data + const type: "llm" | "tts" = data.spoken ? "tts" : "llm"; + + // Check if this is the first BotOutput for this type in the current turn + const streamState = botOutputStreamStateRef.current; + const isFirstForType = + (type === "llm" && !streamState.llmStarted) || + (type === "tts" && !streamState.ttsStarted); + + if (isFirstForType && callbacks.onBotMessageStarted) { + callbacks.onBotMessageStarted(type); + if (type === "llm") { + streamState.llmStarted = true; + } else { + streamState.ttsStarted = true; + } + } + + // Process the text chunk with proper spacing for BotOutput + if (callbacks.onBotMessageChunk && data.text) { + const lastChunk = botOutputStreamStateRef.current.lastChunkText[type]; + let textToSend = data.text; + + // Add space separator if needed between BotOutput chunks + if ( + lastChunk && + !lastChunk.endsWith(" ") && + !textToSend.startsWith(" ") + ) { + textToSend = " " + textToSend; + } + + // Include metadata for BotOutput events + const metadata: BotMessageChunkMetadata = { + aggregated_by: data.aggregated_by, + }; + + callbacks.onBotMessageChunk(type, textToSend, metadata); + botOutputStreamStateRef.current.lastChunkText[type] = textToSend; + } + + // If this is a sentence-level output, it might indicate completion + // However, we'll rely on BotStoppedSpeaking for definitive end detection + } + }); + + // Handle BotStoppedSpeaking to signal end of message streams + useRTVIClientEvent(RTVIEvent.BotStoppedSpeaking, () => { + if (botOutputSupported === true) { + // For BotOutput, signal end for any active streams + const streamState = botOutputStreamStateRef.current; + if (streamState.llmStarted && callbacks.onBotMessageEnded) { + callbacks.onBotMessageEnded("llm"); + } + if (streamState.ttsStarted && callbacks.onBotMessageEnded) { + callbacks.onBotMessageEnded("tts"); + } + // Reset stream state + botOutputStreamStateRef.current = { + llmStarted: false, + ttsStarted: false, + lastChunkText: { llm: "", tts: "" }, + }; + } + }); + + // Handle legacy BotLlmStarted events + useRTVIClientEvent(RTVIEvent.BotLlmStarted, () => { + // Handle the event based on support status + if (botOutputSupported === false && callbacks.onBotMessageStarted) { + callbacks.onBotMessageStarted("llm"); + } else if (botOutputSupported === null) { + // Cache during period before BotReady is received + cachedEventsRef.current.llmStarted = true; + } + }); + + useRTVIClientEvent(RTVIEvent.BotLlmText, (data) => { + if (botOutputSupported === false && callbacks.onBotMessageChunk) { + callbacks.onBotMessageChunk("llm", data.text); + } else if (botOutputSupported === null) { + // Cache during period before BotReady is received with proper spacing + const cached = cachedEventsRef.current.llmText; + cachedEventsRef.current.llmText += + (cached && !cached.endsWith(" ") && !data.text.startsWith(" ") + ? " " + : "") + data.text; + } + }); + + useRTVIClientEvent(RTVIEvent.BotLlmStopped, () => { + if (botOutputSupported === false && callbacks.onBotMessageEnded) { + callbacks.onBotMessageEnded("llm"); + } else if (botOutputSupported === null) { + // Cache during period before BotReady is received + cachedEventsRef.current.llmStopped = true; + } + }); + + useRTVIClientEvent(RTVIEvent.BotTtsStarted, () => { + if (botOutputSupported === false && callbacks.onBotMessageStarted) { + callbacks.onBotMessageStarted("tts"); + } else if (botOutputSupported === null) { + // Cache during period before BotReady is received + cachedEventsRef.current.ttsStarted = true; + } + }); + + useRTVIClientEvent(RTVIEvent.BotTtsText, (data) => { + if (botOutputSupported === false && callbacks.onBotMessageChunk) { + callbacks.onBotMessageChunk("tts", data.text); + } else if (botOutputSupported === null) { + // Cache during period before BotReady is received with proper spacing + const cached = cachedEventsRef.current.ttsText; + cachedEventsRef.current.ttsText += + (cached && !cached.endsWith(" ") && !data.text.startsWith(" ") + ? " " + : "") + data.text; + } + }); + + useRTVIClientEvent(RTVIEvent.BotTtsStopped, () => { + if (botOutputSupported === false && callbacks.onBotMessageEnded) { + callbacks.onBotMessageEnded("tts"); + } else if (botOutputSupported === null) { + // Cache during period before BotReady is received + cachedEventsRef.current.ttsStopped = true; + } + }); + + return { + botOutputSupported, + }; +} diff --git a/package/src/stores/conversationStore.ts b/package/src/stores/conversationStore.ts index 23b494e..248e8c1 100644 --- a/package/src/stores/conversationStore.ts +++ b/package/src/stores/conversationStore.ts @@ -38,7 +38,7 @@ interface ConversationState { updateAssistantText: ( text: string, final: boolean, - source: "llm" | "tts", + source: "llm" | "tts", // Derived from BotOutput.spoken: "llm" if false, "tts" if true ) => void; startAssistantLlmStream: () => void; } diff --git a/package/src/utils/version.ts b/package/src/utils/version.ts new file mode 100644 index 0000000..e169ad2 --- /dev/null +++ b/package/src/utils/version.ts @@ -0,0 +1,61 @@ +/** + * Version comparison utilities for semantic versioning using the semver package + */ + +import semver from "semver"; + +/** + * Converts a version array [major, minor, patch] to a semver string + */ +function versionArrayToString(version: [number, number, number]): string { + return `${version[0]}.${version[1]}.${version[2]}`; +} + +/** + * Checks if a version meets a minimum version requirement. + * @param currentVersion - The current version string (e.g., "0.0.98") + * @param minVersion - The minimum version as an array [major, minor, patch] + * @returns true if currentVersion >= minVersion + */ +export function isMinVersion( + currentVersion: string, + minVersion: [number, number, number], +): boolean { + const minVersionStr = versionArrayToString(minVersion); + return semver.gte(currentVersion, minVersionStr); +} + +/** + * Checks if a version is below a maximum version. + * @param currentVersion - The current version string (e.g., "0.0.98") + * @param maxVersion - The maximum version as an array [major, minor, patch] + * @returns true if currentVersion <= maxVersion + */ +export function isMaxVersion( + currentVersion: string, + maxVersion: [number, number, number], +): boolean { + const maxVersionStr = versionArrayToString(maxVersion); + return semver.lte(currentVersion, maxVersionStr); +} + +/** + * Checks if a version is within a version range (inclusive). + * @param currentVersion - The current version string (e.g., "0.0.98") + * @param minVersion - The minimum version as an array [major, minor, patch] + * @param maxVersion - The maximum version as an array [major, minor, patch] + * @returns true if minVersion <= currentVersion <= maxVersion + */ +export function isVersionInRange( + currentVersion: string, + minVersion: [number, number, number], + maxVersion: [number, number, number], +): boolean { + const minVersionStr = versionArrayToString(minVersion); + const maxVersionStr = versionArrayToString(maxVersion); + // Use semver's satisfies with a range + return semver.satisfies( + currentVersion, + `>=${minVersionStr} <=${maxVersionStr}`, + ); +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 22b93ea..aebc984 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9,14 +9,14 @@ importers: .: dependencies: '@daily-co/daily-js': - specifier: ^0.84.0 - version: 0.84.0 + specifier: ^0.85.0 + version: 0.85.0 '@pipecat-ai/client-js': - specifier: ^1.4.0 - version: 1.4.0 + specifier: ^1.5.0 + version: 1.5.0 '@pipecat-ai/client-react': specifier: ^1.1.0 - version: 1.1.0(@babel/core@7.28.4)(@babel/template@7.27.2)(@pipecat-ai/client-js@1.4.0)(@types/react@19.1.16)(react-dom@19.2.1(react@19.2.1))(react@19.2.1) + version: 1.1.0(@babel/core@7.28.4)(@babel/template@7.27.2)(@pipecat-ai/client-js@1.5.0)(@types/react@19.1.16)(react-dom@19.2.1(react@19.2.1))(react@19.2.1) react: specifier: ^19.2.1 version: 19.2.1 @@ -50,7 +50,7 @@ importers: version: 2.20.0(react-dom@19.2.1(react@19.2.1))(react@19.2.1) '@pipecat-ai/small-webrtc-transport': specifier: ^1.5.0 - version: 1.5.0(@pipecat-ai/client-js@1.4.0) + version: 1.5.0(@pipecat-ai/client-js@1.5.0) '@pipecat-ai/voice-ui-kit': specifier: workspace:* version: link:../package @@ -399,6 +399,9 @@ importers: react-resizable-panels: specifier: ^3.0.6 version: 3.0.6(react-dom@19.2.1(react@19.2.1))(react@19.2.1) + semver: + specifier: ^7.6.3 + version: 7.7.2 tailwind-merge: specifier: ^3.3.1 version: 3.3.1 @@ -407,8 +410,8 @@ importers: version: 5.0.8(@types/react@19.1.16)(react@19.2.1) devDependencies: '@daily-co/daily-js': - specifier: ^0.80.0 - version: 0.80.0 + specifier: ^0.85.0 + version: 0.85.0 '@eslint/js': specifier: ^9.36.0 version: 9.36.0 @@ -416,17 +419,17 @@ importers: specifier: ^5.0.3 version: 5.0.3(@types/node@22.18.8)(@types/react@19.1.16)(jiti@2.6.0)(lightningcss@1.30.1)(react-dom@19.2.1(react@19.2.1))(react@19.2.1)(tsx@4.20.5)(typescript@5.8.3) '@pipecat-ai/client-js': - specifier: ^1.4.0 - version: 1.4.0 + specifier: ^1.5.0 + version: 1.5.0 '@pipecat-ai/client-react': specifier: ^1.1.0 - version: 1.1.0(@babel/core@7.28.4)(@babel/template@7.27.2)(@pipecat-ai/client-js@1.4.0)(@types/react@19.1.16)(react-dom@19.2.1(react@19.2.1))(react@19.2.1) + version: 1.1.0(@babel/core@7.28.4)(@babel/template@7.27.2)(@pipecat-ai/client-js@1.5.0)(@types/react@19.1.16)(react-dom@19.2.1(react@19.2.1))(react@19.2.1) '@pipecat-ai/daily-transport': - specifier: ^1.4.0 - version: 1.4.0(@pipecat-ai/client-js@1.4.0) - '@pipecat-ai/small-webrtc-transport': specifier: ^1.5.0 - version: 1.5.0(@pipecat-ai/client-js@1.4.0) + version: 1.5.0(@pipecat-ai/client-js@1.5.0) + '@pipecat-ai/small-webrtc-transport': + specifier: ^1.8.0 + version: 1.8.0(@pipecat-ai/client-js@1.5.0) '@tailwindcss/vite': specifier: ^4.1.13 version: 4.1.13(vite@7.1.7(@types/node@22.18.8)(jiti@2.6.0)(lightningcss@1.30.1)(tsx@4.20.5)) @@ -629,10 +632,6 @@ packages: react: ^16.8.0 || ^17 || ^18 || ^19 react-dom: ^16.8.0 || ^17 || ^18 || ^19 - '@daily-co/daily-js@0.80.0': - resolution: {integrity: sha512-zG2NBbKbHfm56P0lg4ddC94vBtn5AQKcgvbYrO5+ohNWPSolMqlJiYxQC9uhOHfFYRhH4ELKQ6NHqGatX9VD7A==} - engines: {node: '>=10.0.0'} - '@daily-co/daily-js@0.83.1': resolution: {integrity: sha512-KXA3zrSnNPZONwhip4TI6ayD3huumI1QBD/xPAjFArvHuFFB7t0QTOS2oxH1bXvdOBJ6XnY08vdlJslTVi2/2w==} engines: {node: '>=10.0.0'} @@ -641,6 +640,10 @@ packages: resolution: {integrity: sha512-/ynXrMDDkRXhLlHxiFNf9QU5yw4ZGPr56wNARgja/Tiid71UIniundTavCNF5cMb2I1vNoMh7oEJ/q8stg/V7g==} engines: {node: '>=10.0.0'} + '@daily-co/daily-js@0.85.0': + resolution: {integrity: sha512-lpl111ZWNTUWDnwYcPuNi9PGJPbLCeCw6LzmEY40nG0hv1jg5JLVW8Rq3Cj/+lOCP6W6h4PXm211ss0FFnxITQ==} + engines: {node: '>=10.0.0'} + '@dimforge/rapier3d-compat@0.12.0': resolution: {integrity: sha512-uekIGetywIgopfD97oDL5PfeezkFpNhwlzlaEYNOA0N6ghdsOvh/HYjSMek5Q2O1PYvRSDFcqFVJl4r4ZBwOow==} @@ -1450,8 +1453,8 @@ packages: resolution: {integrity: sha512-Szki0cgFiXE5F9RLx2lUyEtJllnuCSQ4B8RLDwIjXkVit6qZjoDAxH+xhJs29MjKLDz0tbPLdKFa6QrQ/qoGGA==} engines: {node: '>= 20.0.0'} - '@pipecat-ai/client-js@1.4.0': - resolution: {integrity: sha512-HOWz4OWXbIetkywwrfVuEiEpU8FVFksmyP/rSWHSQGu/V2MJflAXfFt2tQmtu8RiQCA7C1qurfZGsxeccYmJXw==} + '@pipecat-ai/client-js@1.5.0': + resolution: {integrity: sha512-WE8n5P98XoOyEX+k6DkcxBFaeoP82rDl3BEm4Zxf8tw1Umw5zFsv73qR6ztU4D5WDbYiscA3J+hGfe+g8PSUIw==} '@pipecat-ai/client-react@1.1.0': resolution: {integrity: sha512-OfXd2vd8ooOE1I/7SwVnSPXz2UC/L2GlAMYVO40ClzoeeJidywcNfbqTO2ZDxcxvFf15tv4O9aAM3uPo4Ou13A==} @@ -1460,16 +1463,21 @@ packages: react: '>=18' react-dom: '>=18' - '@pipecat-ai/daily-transport@1.4.0': - resolution: {integrity: sha512-zGfrpOAjyE7oSgcYhFp70Ut6flRgBdpzgAPbSsdLgHjWDD9zHPG4OkSrQg+/y5YabWX623ulD/321EKgKZamPg==} + '@pipecat-ai/daily-transport@1.5.0': + resolution: {integrity: sha512-dVe8XQjBxsQBWR3/567ClJYoEMIMN2+8gcdkbTGoK2EfQqnKIAxmIQo/o3hXzBrZe/PNzvoMOap7o7w2OnPQXQ==} peerDependencies: - '@pipecat-ai/client-js': ~1.4.0 + '@pipecat-ai/client-js': ~1.5.0 '@pipecat-ai/small-webrtc-transport@1.5.0': resolution: {integrity: sha512-kGebGYD60U9A7wKskw9pRw3XZUZ+tepumqc5ObaST8tzM1O+oroDOriPxEtMaOVEglAyOg+7gXYUBDexK2Rp3Q==} peerDependencies: '@pipecat-ai/client-js': ~1.4.0 + '@pipecat-ai/small-webrtc-transport@1.8.0': + resolution: {integrity: sha512-TaBAA0Esa2DEFBlUmOMw/cxPvaHB/UiKb6OVLvduke0ZUzjk2b1h6C3OHwTyszIkBNnqk9AEErqfFgImImGr2A==} + peerDependencies: + '@pipecat-ai/client-js': ~1.5.0 + '@pkgjs/parseargs@0.11.0': resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==} engines: {node: '>=14'} @@ -6201,7 +6209,7 @@ snapshots: react-dom: 19.2.1(react@19.2.1) react-is: 17.0.2 - '@daily-co/daily-js@0.80.0': + '@daily-co/daily-js@0.83.1': dependencies: '@babel/runtime': 7.28.4 '@sentry/browser': 8.55.0 @@ -6209,7 +6217,7 @@ snapshots: dequal: 2.0.3 events: 3.3.0 - '@daily-co/daily-js@0.83.1': + '@daily-co/daily-js@0.84.0': dependencies: '@babel/runtime': 7.28.4 '@sentry/browser': 8.55.0 @@ -6217,7 +6225,7 @@ snapshots: dequal: 2.0.3 events: 3.3.0 - '@daily-co/daily-js@0.84.0': + '@daily-co/daily-js@0.85.0': dependencies: '@babel/runtime': 7.28.4 '@sentry/browser': 8.55.0 @@ -6942,7 +6950,7 @@ snapshots: '@orama/orama@3.1.11': {} - '@pipecat-ai/client-js@1.4.0': + '@pipecat-ai/client-js@1.5.0': dependencies: '@types/events': 3.0.3 bowser: 2.12.1 @@ -6951,9 +6959,9 @@ snapshots: typed-emitter: 2.1.0 uuid: 10.0.0 - '@pipecat-ai/client-react@1.1.0(@babel/core@7.28.4)(@babel/template@7.27.2)(@pipecat-ai/client-js@1.4.0)(@types/react@19.1.16)(react-dom@19.2.1(react@19.2.1))(react@19.2.1)': + '@pipecat-ai/client-react@1.1.0(@babel/core@7.28.4)(@babel/template@7.27.2)(@pipecat-ai/client-js@1.5.0)(@types/react@19.1.16)(react-dom@19.2.1(react@19.2.1))(react@19.2.1)': dependencies: - '@pipecat-ai/client-js': 1.4.0 + '@pipecat-ai/client-js': 1.5.0 jotai: 2.15.0(@babel/core@7.28.4)(@babel/template@7.27.2)(@types/react@19.1.16)(react@19.2.1) react: 19.2.1 react-dom: 19.2.1(react@19.2.1) @@ -6962,15 +6970,22 @@ snapshots: - '@babel/template' - '@types/react' - '@pipecat-ai/daily-transport@1.4.0(@pipecat-ai/client-js@1.4.0)': + '@pipecat-ai/daily-transport@1.5.0(@pipecat-ai/client-js@1.5.0)': + dependencies: + '@daily-co/daily-js': 0.84.0 + '@pipecat-ai/client-js': 1.5.0 + + '@pipecat-ai/small-webrtc-transport@1.5.0(@pipecat-ai/client-js@1.5.0)': dependencies: '@daily-co/daily-js': 0.83.1 - '@pipecat-ai/client-js': 1.4.0 + '@pipecat-ai/client-js': 1.5.0 + dequal: 2.0.3 + lodash: 4.17.21 - '@pipecat-ai/small-webrtc-transport@1.5.0(@pipecat-ai/client-js@1.4.0)': + '@pipecat-ai/small-webrtc-transport@1.8.0(@pipecat-ai/client-js@1.5.0)': dependencies: '@daily-co/daily-js': 0.83.1 - '@pipecat-ai/client-js': 1.4.0 + '@pipecat-ai/client-js': 1.5.0 dequal: 2.0.3 lodash: 4.17.21 From 174f244120af388e0bdc4e9c4ef532cc36d64ad3 Mon Sep 17 00:00:00 2001 From: Christian Stuff Date: Thu, 18 Dec 2025 13:18:37 +0100 Subject: [PATCH 2/5] update callback checks --- package/src/hooks/useBotMessages.ts | 66 ++++++++++++++--------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/package/src/hooks/useBotMessages.ts b/package/src/hooks/useBotMessages.ts index f380ac9..c8e9343 100644 --- a/package/src/hooks/useBotMessages.ts +++ b/package/src/hooks/useBotMessages.ts @@ -125,7 +125,7 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { setBotOutputSupported(supportsBotOutput); // If we determined support and have cached data, apply it - if (supportsBotOutput && cachedEventsRef.current) { + if (supportsBotOutput) { applyCachedData(); // Reset stream state after applying cached data botOutputStreamStateRef.current = { @@ -150,27 +150,27 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { const cached = cachedEventsRef.current; // Apply cached started events - if (cached.llmStarted && callbacks.onBotMessageStarted) { - callbacks.onBotMessageStarted("llm"); + if (cached.llmStarted) { + callbacks.onBotMessageStarted?.("llm"); } - if (cached.ttsStarted && callbacks.onBotMessageStarted) { - callbacks.onBotMessageStarted("tts"); + if (cached.ttsStarted) { + callbacks.onBotMessageStarted?.("tts"); } // Apply cached text chunks - if (cached.llmText && callbacks.onBotMessageChunk) { - callbacks.onBotMessageChunk("llm", cached.llmText); + if (cached.llmText) { + callbacks.onBotMessageChunk?.("llm", cached.llmText); } - if (cached.ttsText && callbacks.onBotMessageChunk) { - callbacks.onBotMessageChunk("tts", cached.ttsText); + if (cached.ttsText) { + callbacks.onBotMessageChunk?.("tts", cached.ttsText); } // Apply cached ended events - if (cached.llmStopped && callbacks.onBotMessageEnded) { - callbacks.onBotMessageEnded("llm"); + if (cached.llmStopped) { + callbacks.onBotMessageEnded?.("llm"); } - if (cached.ttsStopped && callbacks.onBotMessageEnded) { - callbacks.onBotMessageEnded("tts"); + if (cached.ttsStopped) { + callbacks.onBotMessageEnded?.("tts"); } // Clear cache after applying @@ -197,8 +197,8 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { (type === "llm" && !streamState.llmStarted) || (type === "tts" && !streamState.ttsStarted); - if (isFirstForType && callbacks.onBotMessageStarted) { - callbacks.onBotMessageStarted(type); + if (isFirstForType) { + callbacks.onBotMessageStarted?.(type); if (type === "llm") { streamState.llmStarted = true; } else { @@ -207,7 +207,7 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { } // Process the text chunk with proper spacing for BotOutput - if (callbacks.onBotMessageChunk && data.text) { + if (data.text) { const lastChunk = botOutputStreamStateRef.current.lastChunkText[type]; let textToSend = data.text; @@ -225,7 +225,7 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { aggregated_by: data.aggregated_by, }; - callbacks.onBotMessageChunk(type, textToSend, metadata); + callbacks.onBotMessageChunk?.(type, textToSend, metadata); botOutputStreamStateRef.current.lastChunkText[type] = textToSend; } @@ -239,11 +239,11 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { if (botOutputSupported === true) { // For BotOutput, signal end for any active streams const streamState = botOutputStreamStateRef.current; - if (streamState.llmStarted && callbacks.onBotMessageEnded) { - callbacks.onBotMessageEnded("llm"); + if (streamState.llmStarted) { + callbacks.onBotMessageEnded?.("llm"); } - if (streamState.ttsStarted && callbacks.onBotMessageEnded) { - callbacks.onBotMessageEnded("tts"); + if (streamState.ttsStarted) { + callbacks.onBotMessageEnded?.("tts"); } // Reset stream state botOutputStreamStateRef.current = { @@ -257,8 +257,8 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { // Handle legacy BotLlmStarted events useRTVIClientEvent(RTVIEvent.BotLlmStarted, () => { // Handle the event based on support status - if (botOutputSupported === false && callbacks.onBotMessageStarted) { - callbacks.onBotMessageStarted("llm"); + if (botOutputSupported === false) { + callbacks.onBotMessageStarted?.("llm"); } else if (botOutputSupported === null) { // Cache during period before BotReady is received cachedEventsRef.current.llmStarted = true; @@ -266,8 +266,8 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { }); useRTVIClientEvent(RTVIEvent.BotLlmText, (data) => { - if (botOutputSupported === false && callbacks.onBotMessageChunk) { - callbacks.onBotMessageChunk("llm", data.text); + if (botOutputSupported === false) { + callbacks.onBotMessageChunk?.("llm", data.text); } else if (botOutputSupported === null) { // Cache during period before BotReady is received with proper spacing const cached = cachedEventsRef.current.llmText; @@ -279,8 +279,8 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { }); useRTVIClientEvent(RTVIEvent.BotLlmStopped, () => { - if (botOutputSupported === false && callbacks.onBotMessageEnded) { - callbacks.onBotMessageEnded("llm"); + if (botOutputSupported === false) { + callbacks.onBotMessageEnded?.("llm"); } else if (botOutputSupported === null) { // Cache during period before BotReady is received cachedEventsRef.current.llmStopped = true; @@ -288,8 +288,8 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { }); useRTVIClientEvent(RTVIEvent.BotTtsStarted, () => { - if (botOutputSupported === false && callbacks.onBotMessageStarted) { - callbacks.onBotMessageStarted("tts"); + if (botOutputSupported === false) { + callbacks.onBotMessageStarted?.("tts"); } else if (botOutputSupported === null) { // Cache during period before BotReady is received cachedEventsRef.current.ttsStarted = true; @@ -297,8 +297,8 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { }); useRTVIClientEvent(RTVIEvent.BotTtsText, (data) => { - if (botOutputSupported === false && callbacks.onBotMessageChunk) { - callbacks.onBotMessageChunk("tts", data.text); + if (botOutputSupported === false) { + callbacks.onBotMessageChunk?.("tts", data.text); } else if (botOutputSupported === null) { // Cache during period before BotReady is received with proper spacing const cached = cachedEventsRef.current.ttsText; @@ -310,8 +310,8 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { }); useRTVIClientEvent(RTVIEvent.BotTtsStopped, () => { - if (botOutputSupported === false && callbacks.onBotMessageEnded) { - callbacks.onBotMessageEnded("tts"); + if (botOutputSupported === false) { + callbacks.onBotMessageEnded?.("tts"); } else if (botOutputSupported === null) { // Cache during period before BotReady is received cachedEventsRef.current.ttsStopped = true; From db6ea4f319704b93b8026b2aeacab55a9fb9b4ae Mon Sep 17 00:00:00 2001 From: Christian Stuff Date: Fri, 19 Dec 2025 08:34:38 +0100 Subject: [PATCH 3/5] simplify useBotMessages - remove unnecessary caching of llm/tts events - simplify botOutputSupported checks --- package/src/hooks/useBotMessages.ts | 248 ++++++---------------------- 1 file changed, 55 insertions(+), 193 deletions(-) diff --git a/package/src/hooks/useBotMessages.ts b/package/src/hooks/useBotMessages.ts index c8e9343..950f034 100644 --- a/package/src/hooks/useBotMessages.ts +++ b/package/src/hooks/useBotMessages.ts @@ -1,20 +1,8 @@ import { BotOutputData, BotReadyData, RTVIEvent } from "@pipecat-ai/client-js"; import { useRTVIClientEvent } from "@pipecat-ai/client-react"; -import { useCallback, useRef, useState } from "react"; +import { useRef, useState } from "react"; import { isMinVersion } from "@/utils/version"; -/** - * Cached data from old BotTts/BotLlm events during probe period - */ -interface CachedBotEvents { - llmText: string; - ttsText: string; - llmStarted: boolean; - ttsStarted: boolean; - llmStopped: boolean; - ttsStopped: boolean; -} - /** * Metadata for bot message chunks (only available for BotOutput events) */ @@ -57,26 +45,14 @@ export interface UseBotMessagesCallbacks { * Hook for handling bot messages with automatic BotOutput support detection and fallback. * * This hook automatically detects whether the server supports BotOutput events by checking - * the BotReady event's library version information. BotOutput is supported in pipecat 0.0.98+. - * During the period before BotReady is received, it caches old event data. Once support is - * determined, it applies cached data and routes events accordingly. + * the BotReady event's RTVI protocol version. BotOutput is supported in RTVI 1.1.0+. + * Once support is determined, it routes events accordingly. * * @param callbacks - Callback functions for handling bot message events * @returns Object containing botOutputSupported status */ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { - const [botOutputSupported, setBotOutputSupported] = useState( - null, - ); // null = unknown, true = supported, false = not supported - - const cachedEventsRef = useRef({ - llmText: "", - ttsText: "", - llmStarted: false, - ttsStarted: false, - llmStopped: false, - ttsStopped: false, - }); + const [botOutputSupported, setBotOutputSupported] = useState(false); // Track message stream state for BotOutput events const botOutputStreamStateRef = useRef<{ @@ -91,15 +67,7 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { // Reset state on connection useRTVIClientEvent(RTVIEvent.Connected, () => { - setBotOutputSupported(null); - cachedEventsRef.current = { - llmText: "", - ttsText: "", - llmStarted: false, - ttsStarted: false, - llmStopped: false, - ttsStopped: false, - }; + setBotOutputSupported(false); botOutputStreamStateRef.current = { llmStarted: false, ttsStarted: false, @@ -109,134 +77,61 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { // Check BotOutput support from BotReady event useRTVIClientEvent(RTVIEvent.BotReady, (botData: BotReadyData) => { - // Type guard to check if about has the expected structure - const about = - botData.about && - typeof botData.about === "object" && - "library" in botData.about && - "library_version" in botData.about - ? (botData.about as { library: string; library_version: string }) - : undefined; + const rtviVersion = botData.version; + const supportsBotOutput = isMinVersion(rtviVersion, [1, 1, 0]); + setBotOutputSupported(supportsBotOutput); - // Check if library information is available - if (about?.library && about?.library_version) { - // BotOutput is supported in pipecat 0.0.98+ - const supportsBotOutput = isMinVersion(about.library_version, [0, 0, 98]); - setBotOutputSupported(supportsBotOutput); - - // If we determined support and have cached data, apply it - if (supportsBotOutput) { - applyCachedData(); - // Reset stream state after applying cached data - botOutputStreamStateRef.current = { - llmStarted: false, - ttsStarted: false, - lastChunkText: { llm: "", tts: "" }, - }; - } else if (!supportsBotOutput) { - // If BotOutput is not supported, apply cached data as legacy events - applyCachedData(); - } - } else { - // No library information available - assume BotOutput is not supported - setBotOutputSupported(false); - // Apply any cached data as legacy events - applyCachedData(); - } - }); - - // Apply cached data helper - const applyCachedData = useCallback(() => { - const cached = cachedEventsRef.current; - - // Apply cached started events - if (cached.llmStarted) { - callbacks.onBotMessageStarted?.("llm"); - } - if (cached.ttsStarted) { - callbacks.onBotMessageStarted?.("tts"); - } - - // Apply cached text chunks - if (cached.llmText) { - callbacks.onBotMessageChunk?.("llm", cached.llmText); - } - if (cached.ttsText) { - callbacks.onBotMessageChunk?.("tts", cached.ttsText); - } - - // Apply cached ended events - if (cached.llmStopped) { - callbacks.onBotMessageEnded?.("llm"); - } - if (cached.ttsStopped) { - callbacks.onBotMessageEnded?.("tts"); - } - - // Clear cache after applying - cachedEventsRef.current = { - llmText: "", - ttsText: "", + // Reset stream state when BotReady is received + botOutputStreamStateRef.current = { llmStarted: false, ttsStarted: false, - llmStopped: false, - ttsStopped: false, + lastChunkText: { llm: "", tts: "" }, }; - }, [callbacks]); + }); // BotOutput handler - maps to unified callbacks useRTVIClientEvent(RTVIEvent.BotOutput, (data: BotOutputData) => { - // Only process BotOutput if it's supported - if (botOutputSupported === true) { - // Derive message type from BotOutput data - const type: "llm" | "tts" = data.spoken ? "tts" : "llm"; - - // Check if this is the first BotOutput for this type in the current turn - const streamState = botOutputStreamStateRef.current; - const isFirstForType = - (type === "llm" && !streamState.llmStarted) || - (type === "tts" && !streamState.ttsStarted); - - if (isFirstForType) { - callbacks.onBotMessageStarted?.(type); - if (type === "llm") { - streamState.llmStarted = true; - } else { - streamState.ttsStarted = true; - } + // Derive message type from BotOutput data + const type: "llm" | "tts" = data.spoken ? "tts" : "llm"; + + // Check if this is the first BotOutput for this type in the current turn + const streamState = botOutputStreamStateRef.current; + const isFirstForType = + (type === "llm" && !streamState.llmStarted) || + (type === "tts" && !streamState.ttsStarted); + + if (isFirstForType) { + callbacks.onBotMessageStarted?.(type); + if (type === "llm") { + streamState.llmStarted = true; + } else { + streamState.ttsStarted = true; } + } - // Process the text chunk with proper spacing for BotOutput - if (data.text) { - const lastChunk = botOutputStreamStateRef.current.lastChunkText[type]; - let textToSend = data.text; - - // Add space separator if needed between BotOutput chunks - if ( - lastChunk && - !lastChunk.endsWith(" ") && - !textToSend.startsWith(" ") - ) { - textToSend = " " + textToSend; - } - - // Include metadata for BotOutput events - const metadata: BotMessageChunkMetadata = { - aggregated_by: data.aggregated_by, - }; + // Process the text chunk with proper spacing for BotOutput + if (data.text) { + const lastChunk = botOutputStreamStateRef.current.lastChunkText[type]; + let textToSend = data.text; - callbacks.onBotMessageChunk?.(type, textToSend, metadata); - botOutputStreamStateRef.current.lastChunkText[type] = textToSend; + // Add space separator if needed between BotOutput chunks + if (lastChunk) { + textToSend = " " + textToSend; } - // If this is a sentence-level output, it might indicate completion - // However, we'll rely on BotStoppedSpeaking for definitive end detection + // Include metadata for BotOutput events + const metadata: BotMessageChunkMetadata = { + aggregated_by: data.aggregated_by, + }; + + callbacks.onBotMessageChunk?.(type, textToSend, metadata); + botOutputStreamStateRef.current.lastChunkText[type] = textToSend; } }); // Handle BotStoppedSpeaking to signal end of message streams useRTVIClientEvent(RTVIEvent.BotStoppedSpeaking, () => { - if (botOutputSupported === true) { + if (botOutputSupported) { // For BotOutput, signal end for any active streams const streamState = botOutputStreamStateRef.current; if (streamState.llmStarted) { @@ -256,66 +151,33 @@ export function useBotMessages(callbacks: UseBotMessagesCallbacks) { // Handle legacy BotLlmStarted events useRTVIClientEvent(RTVIEvent.BotLlmStarted, () => { - // Handle the event based on support status - if (botOutputSupported === false) { - callbacks.onBotMessageStarted?.("llm"); - } else if (botOutputSupported === null) { - // Cache during period before BotReady is received - cachedEventsRef.current.llmStarted = true; - } + if (botOutputSupported) return; + callbacks.onBotMessageStarted?.("llm"); }); useRTVIClientEvent(RTVIEvent.BotLlmText, (data) => { - if (botOutputSupported === false) { - callbacks.onBotMessageChunk?.("llm", data.text); - } else if (botOutputSupported === null) { - // Cache during period before BotReady is received with proper spacing - const cached = cachedEventsRef.current.llmText; - cachedEventsRef.current.llmText += - (cached && !cached.endsWith(" ") && !data.text.startsWith(" ") - ? " " - : "") + data.text; - } + if (botOutputSupported) return; + callbacks.onBotMessageChunk?.("llm", data.text); }); useRTVIClientEvent(RTVIEvent.BotLlmStopped, () => { - if (botOutputSupported === false) { - callbacks.onBotMessageEnded?.("llm"); - } else if (botOutputSupported === null) { - // Cache during period before BotReady is received - cachedEventsRef.current.llmStopped = true; - } + if (botOutputSupported) return; + callbacks.onBotMessageEnded?.("llm"); }); useRTVIClientEvent(RTVIEvent.BotTtsStarted, () => { - if (botOutputSupported === false) { - callbacks.onBotMessageStarted?.("tts"); - } else if (botOutputSupported === null) { - // Cache during period before BotReady is received - cachedEventsRef.current.ttsStarted = true; - } + if (botOutputSupported) return; + callbacks.onBotMessageStarted?.("tts"); }); useRTVIClientEvent(RTVIEvent.BotTtsText, (data) => { - if (botOutputSupported === false) { - callbacks.onBotMessageChunk?.("tts", data.text); - } else if (botOutputSupported === null) { - // Cache during period before BotReady is received with proper spacing - const cached = cachedEventsRef.current.ttsText; - cachedEventsRef.current.ttsText += - (cached && !cached.endsWith(" ") && !data.text.startsWith(" ") - ? " " - : "") + data.text; - } + if (botOutputSupported) return; + callbacks.onBotMessageChunk?.("tts", data.text); }); useRTVIClientEvent(RTVIEvent.BotTtsStopped, () => { - if (botOutputSupported === false) { - callbacks.onBotMessageEnded?.("tts"); - } else if (botOutputSupported === null) { - // Cache during period before BotReady is received - cachedEventsRef.current.ttsStopped = true; - } + if (botOutputSupported) return; + callbacks.onBotMessageEnded?.("tts"); }); return { From 0a9c52dc55dfbe85d8c8eb78307ad38a3af27dd4 Mon Sep 17 00:00:00 2001 From: Christian Stuff Date: Fri, 19 Dec 2025 10:41:08 +0100 Subject: [PATCH 4/5] refactor botOutput handling and detection --- .../src/components/ConversationProvider.tsx | 98 +++++++++--- .../components/elements/MessageContent.tsx | 65 +++++++- .../components/elements/TranscriptOverlay.tsx | 78 ++++++---- .../components/panels/ConversationPanel.tsx | 10 +- package/src/hooks/index.ts | 5 +- package/src/hooks/useBotMessages.ts | 141 ++---------------- package/src/hooks/usePipecatConversation.ts | 34 ++++- package/src/stores/conversationStore.ts | 127 +++++++++++++++- package/src/types/conversation.ts | 16 +- 9 files changed, 376 insertions(+), 198 deletions(-) diff --git a/package/src/components/ConversationProvider.tsx b/package/src/components/ConversationProvider.tsx index 710f62b..1290e07 100644 --- a/package/src/components/ConversationProvider.tsx +++ b/package/src/components/ConversationProvider.tsx @@ -4,9 +4,10 @@ import { type ConversationMessagePart, } from "@/types/conversation"; import { useBotMessages } from "@/hooks/useBotMessages"; -import { RTVIEvent } from "@pipecat-ai/client-js"; +import { BotOutputData, BotReadyData, RTVIEvent } from "@pipecat-ai/client-js"; import { useRTVIClientEvent } from "@pipecat-ai/client-react"; -import { createContext, useContext, useRef } from "react"; +import { createContext, useContext, useRef, useState } from "react"; +import { isMinVersion } from "@/utils/version"; interface ConversationContextValue { messages: ConversationMessage[]; @@ -14,6 +15,11 @@ interface ConversationContextValue { role: "user" | "assistant" | "system"; parts: ConversationMessagePart[]; }) => void; + /** + * Whether BotOutput events are supported (RTVI 1.1.0+) + * null = unknown (before BotReady), true = supported, false = not supported + */ + botOutputSupported: boolean | null; } const ConversationContext = createContext( @@ -30,13 +36,20 @@ export const ConversationProvider = ({ children }: React.PropsWithChildren) => { injectMessage, upsertUserTranscript, updateAssistantText, + updateAssistantBotOutput, } = useConversationStore(); + // null = unknown (before BotReady), true = supported, false = not supported + const [botOutputSupported, setBotOutputSupported] = useState( + null, + ); const userStoppedTimeout = useRef>(undefined); const assistantStreamResetRef = useRef(0); useRTVIClientEvent(RTVIEvent.Connected, () => { clearMessages(); + setBotOutputSupported(null); + botOutputLastChunkRef.current = { spoken: "", unspoken: "" }; }); // Helper to ensure assistant message exists @@ -62,28 +75,68 @@ export const ConversationProvider = ({ children }: React.PropsWithChildren) => { return false; }; - // Use the bot messages hook to handle BotOutput detection and fallback - useBotMessages({ - onBotMessageStarted: () => { - ensureAssistantMessage(); - }, - onBotMessageChunk: (type, text) => { - // The hook handles spacing for BotOutput chunks internally - // For legacy events, spacing is handled by the store for TTS - updateAssistantText(text, false, type); - }, - onBotMessageEnded: () => { - const store = useConversationStore.getState(); - const lastAssistant = store.messages.findLast( - (m: ConversationMessage) => m.role === "assistant", - ); - - if (lastAssistant && !lastAssistant.final) { - finalizeLastMessage("assistant"); - } - }, + // Detect BotOutput support from BotReady event + useRTVIClientEvent(RTVIEvent.BotReady, (botData: BotReadyData) => { + const rtviVersion = botData.version; + const supportsBotOutput = isMinVersion(rtviVersion, [1, 1, 0]); + setBotOutputSupported(supportsBotOutput); + }); + + // Track last chunk text per type for spacing detection in BotOutput mode + const botOutputLastChunkRef = useRef<{ spoken: string; unspoken: string }>({ + spoken: "", + unspoken: "", + }); + + useRTVIClientEvent(RTVIEvent.BotOutput, (data: BotOutputData) => { + ensureAssistantMessage(); + + // Handle spacing for BotOutput chunks + let textToAdd = data.text; + const lastChunk = data.spoken + ? botOutputLastChunkRef.current.spoken + : botOutputLastChunkRef.current.unspoken; + + // Add space separator if needed between BotOutput chunks + if (lastChunk) { + textToAdd = " " + textToAdd; + } + + // Update the appropriate last chunk tracker + if (data.spoken) { + botOutputLastChunkRef.current.spoken = textToAdd; + } else { + botOutputLastChunkRef.current.unspoken = textToAdd; + } + + // Update both spoken and unspoken text streams + const isFinal = data.aggregated_by === "sentence"; + updateAssistantBotOutput(textToAdd, isFinal, data.spoken); }); + // Handle legacy TTS/LLM events (when BotOutput not supported) + useBotMessages( + { + onBotMessageStarted: () => { + ensureAssistantMessage(); + }, + onBotMessageChunk: (type, text) => { + updateAssistantText(text, false, type); + }, + onBotMessageEnded: () => { + const store = useConversationStore.getState(); + const lastAssistant = store.messages.findLast( + (m: ConversationMessage) => m.role === "assistant", + ); + + if (lastAssistant && !lastAssistant.final) { + finalizeLastMessage("assistant"); + } + }, + }, + botOutputSupported === true, + ); + useRTVIClientEvent(RTVIEvent.BotStoppedSpeaking, () => { // Finalize the assistant message when bot stops speaking // This works for both BotOutput and fallback scenarios @@ -131,6 +184,7 @@ export const ConversationProvider = ({ children }: React.PropsWithChildren) => { const contextValue: ConversationContextValue = { messages, injectMessage, + botOutputSupported, }; return ( diff --git a/package/src/components/elements/MessageContent.tsx b/package/src/components/elements/MessageContent.tsx index 7c05b82..7dc6224 100644 --- a/package/src/components/elements/MessageContent.tsx +++ b/package/src/components/elements/MessageContent.tsx @@ -1,5 +1,6 @@ import { cn } from "@/lib/utils"; import { + BotOutputText, ConversationMessage, ConversationMessagePart, } from "@/types/conversation"; @@ -30,25 +31,77 @@ interface Props { message: ConversationMessage; } +/** + * Renders BotOutput mode: shows unspoken text muted, spoken text replaces it + */ +const renderBotOutput = (spoken: string, unspoken: string): React.ReactNode => { + const spokenLength = spoken?.length || 0; + const remainingUnspoken = unspoken ? unspoken.slice(spokenLength) : ""; + + return ( + + {spoken} + {remainingUnspoken && ( + {remainingUnspoken} + )} + + ); +}; + export const MessageContent = ({ classNames = {}, message }: Props) => { const parts = Array.isArray(message.parts) ? message.parts : []; + return (
{parts.map((part: ConversationMessagePart, idx: number) => { const nextPart = parts?.[idx + 1] ?? null; const isText = typeof part.text === "string"; - const nextIsText = nextPart && typeof nextPart.text === "string"; + const isBotOutputTextValue = Boolean( + part.text && + typeof part.text === "object" && + "spoken" in part.text && + "unspoken" in part.text, + ); + const nextIsText = + nextPart && + Boolean( + typeof nextPart.text === "string" || + (nextPart.text && + typeof nextPart.text === "object" && + "spoken" in nextPart.text), + ); + + let content: React.ReactNode; + if (isBotOutputTextValue) { + const botText = part.text as BotOutputText; + content = renderBotOutput(botText.spoken, botText.unspoken); + } else { + content = part.text as React.ReactNode; + } + return ( - {isText ? part.text : part.text} - {isText && nextIsText ? " " : null} + {content} + {(isText || isBotOutputTextValue) && nextIsText ? " " : null} ); })} {parts.length === 0 || - parts.every( - (part) => typeof part.text === "string" && part.text.trim() === "", - ) ? ( + parts.every((part) => { + if (typeof part.text === "string") { + return part.text.trim() === ""; + } + if ( + part.text && + typeof part.text === "object" && + "spoken" in part.text && + "unspoken" in part.text + ) { + const botText = part.text as unknown as BotOutputText; + return botText.spoken.trim() === "" && botText.unspoken.trim() === ""; + } + return false; + }) ? ( ) : null}
{ const [transcript, setTranscript] = useState([]); const [turnEnd, setIsTurnEnd] = useState(false); + const [botOutputSupported, setBotOutputSupported] = useState(false); const transportState = usePipecatClientTransportState(); - // Use the bot messages hook to handle BotOutput detection and fallback - useBotMessages({ - onBotMessageChunk: (type, text, metadata) => { - if (participant === "local") { - return; + // Detect BotOutput support from BotReady event + useRTVIClientEvent(RTVIEvent.BotReady, (botData: BotReadyData) => { + const rtviVersion = botData.version; + const supportsBotOutput = isMinVersion(rtviVersion, [1, 1, 0]); + setBotOutputSupported(supportsBotOutput); + }); + + // Handle BotOutput events (when supported) - only word-level spoken chunks + useRTVIClientEvent(RTVIEvent.BotOutput, (data: BotOutputData) => { + if (participant === "local" || !botOutputSupported) { + return; + } + + // Only process word-level outputs that have been spoken + // These provide real-time word-by-word streaming for karaoke-like UI + if (data.aggregated_by === "word" && data.spoken === true && data.text) { + if (turnEnd) { + setTranscript([]); + setIsTurnEnd(false); } - // Only process TTS chunks (spoken content) - if (type === "tts") { - // For BotOutput events, only process word-level chunks - // For legacy events, process all chunks - if (metadata?.aggregated_by && metadata.aggregated_by !== "word") { + setTranscript((prev) => [...prev, data.text]); + } + }); + + // Handle legacy TTS events (when BotOutput not supported) + useBotMessages( + { + onBotMessageChunk: (type, text) => { + if (participant === "local") { return; } - if (turnEnd) { - setTranscript([]); - setIsTurnEnd(false); - } + // Only process TTS chunks (spoken content) + if (type === "tts") { + if (turnEnd) { + setTranscript([]); + setIsTurnEnd(false); + } - setTranscript((prev) => [...prev, text]); - } - }, - onBotMessageEnded: (type) => { - if (participant === "local") { - return; - } - // Only handle TTS ended events - if (type === "tts") { - setIsTurnEnd(true); - } + setTranscript((prev) => [...prev, text]); + } + }, + onBotMessageEnded: (type) => { + if (participant === "local") { + return; + } + // Only handle TTS ended events + if (type === "tts") { + setIsTurnEnd(true); + } + }, }, - }); + botOutputSupported, + ); useRTVIClientEvent( RTVIEvent.BotStoppedSpeaking, diff --git a/package/src/components/panels/ConversationPanel.tsx b/package/src/components/panels/ConversationPanel.tsx index 7aa435a..e48b306 100644 --- a/package/src/components/panels/ConversationPanel.tsx +++ b/package/src/components/panels/ConversationPanel.tsx @@ -5,6 +5,7 @@ import { Button } from "@/components/ui/button"; import { Panel, PanelContent, PanelHeader } from "@/components/ui/panel"; import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"; import { TextMode } from "@/types/conversation"; +import { useConversationContext } from "@/components/ConversationProvider"; import { LineChartIcon, MessagesSquareIcon } from "lucide-react"; import { memo, useState } from "react"; @@ -53,6 +54,13 @@ export const ConversationPanel: React.FC = memo( }) => { const defaultValue = noConversation ? "metrics" : "conversation"; const [textMode, setTextMode] = useState(initialTextMode); + const { botOutputSupported } = useConversationContext(); + + // Show toggle only when BotOutput is confirmed unsupported (false) and not disabled + // Hide by default (when botOutputSupported is still unknown/null or true) + const shouldShowToggle = + !noTextModeToggle && botOutputSupported === false; + return ( @@ -71,7 +79,7 @@ export const ConversationPanel: React.FC = memo( )} - {!noTextModeToggle && ( + {shouldShowToggle && (