pipecat-ai · Regaddi · Dec 17, 2025 · Dec 18, 2025 · Dec 19, 2025 · Dec 19, 2025
diff --git a/package.json b/package.json
@@ -9,8 +9,8 @@
     "examples/*"
   ],
   "dependencies": {
-    "@daily-co/daily-js": "^0.84.0",
-    "@pipecat-ai/client-js": "^1.4.0",
+    "@daily-co/daily-js": "^0.85.0",
+    "@pipecat-ai/client-js": "^1.5.0",
     "@pipecat-ai/client-react": "^1.1.0",
     "react": "^19.2.1",
     "react-dom": "^19.2.1",

diff --git a/package/package.json b/package/package.json
@@ -103,17 +103,18 @@
     "lucide-react": "^0.511.0",
     "react-chartjs-2": "^5.3.0",
     "react-resizable-panels": "^3.0.6",
+    "semver": "^7.6.3",
     "tailwind-merge": "^3.3.1",
     "zustand": "^5.0.8"
   },
   "devDependencies": {
-    "@daily-co/daily-js": "^0.80.0",
+    "@daily-co/daily-js": "^0.85.0",
     "@eslint/js": "^9.36.0",
     "@ladle/react": "^5.0.3",
-    "@pipecat-ai/client-js": "^1.4.0",
+    "@pipecat-ai/client-js": "^1.5.0",
     "@pipecat-ai/client-react": "^1.1.0",
-    "@pipecat-ai/daily-transport": "^1.4.0",
-    "@pipecat-ai/small-webrtc-transport": "^1.5.0",
+    "@pipecat-ai/daily-transport": "^1.5.0",
+    "@pipecat-ai/small-webrtc-transport": "^1.8.0",
     "@tailwindcss/vite": "^4.1.13",
     "@types/node": "^22.18.8",
     "@types/react": "^19.1.16",

diff --git a/package/src/components/ConversationProvider.tsx b/package/src/components/ConversationProvider.tsx
@@ -3,16 +3,23 @@ import {
   type ConversationMessage,
   type ConversationMessagePart,
 } from "@/types/conversation";
-import { RTVIEvent } from "@pipecat-ai/client-js";
+import { useBotMessages } from "@/hooks/useBotMessages";
+import { BotOutputData, BotReadyData, RTVIEvent } from "@pipecat-ai/client-js";
 import { useRTVIClientEvent } from "@pipecat-ai/client-react";
-import { createContext, useContext, useRef } from "react";
+import { createContext, useContext, useRef, useState } from "react";
+import { isMinVersion } from "@/utils/version";
 
 interface ConversationContextValue {
   messages: ConversationMessage[];
   injectMessage: (message: {
     role: "user" | "assistant" | "system";
     parts: ConversationMessagePart[];
   }) => void;
+  /**
+   * Whether BotOutput events are supported (RTVI 1.1.0+)
+   * null = unknown (before BotReady), true = supported, false = not supported
+   */
+  botOutputSupported: boolean | null;
 }
 
 const ConversationContext = createContext<ConversationContextValue | null>(
@@ -29,32 +36,24 @@ export const ConversationProvider = ({ children }: React.PropsWithChildren) => {
     injectMessage,
     upsertUserTranscript,
     updateAssistantText,
-    startAssistantLlmStream,
+    updateAssistantBotOutput,
   } = useConversationStore();
 
+  // null = unknown (before BotReady), true = supported, false = not supported
+  const [botOutputSupported, setBotOutputSupported] = useState<boolean | null>(
+    null,
+  );
   const userStoppedTimeout = useRef<ReturnType<typeof setTimeout>>(undefined);
   const assistantStreamResetRef = useRef<number>(0);
 
   useRTVIClientEvent(RTVIEvent.Connected, () => {
     clearMessages();
+    setBotOutputSupported(null);
+    botOutputLastChunkRef.current = { spoken: "", unspoken: "" };
   });
 
-  useRTVIClientEvent(RTVIEvent.BotLlmStarted, () => {
-    startAssistantLlmStream();
-    // Nudge a reset counter so any consumer logic can infer fresh turn if needed
-    assistantStreamResetRef.current += 1;
-  });
-
-  useRTVIClientEvent(RTVIEvent.BotLlmText, (data) => {
-    updateAssistantText(data.text, false, "llm");
-  });
-
-  useRTVIClientEvent(RTVIEvent.BotLlmStopped, () => {
-    finalizeLastMessage("assistant");
-  });
-
-  useRTVIClientEvent(RTVIEvent.BotTtsStarted, () => {
-    // Start a new assistant message for TTS if there isn't one already in progress
+  // Helper to ensure assistant message exists
+  const ensureAssistantMessage = () => {
     const store = useConversationStore.getState();
     const lastAssistantIndex = store.messages.findLastIndex(
       (msg: ConversationMessage) => msg.role === "assistant",
@@ -70,15 +69,77 @@ export const ConversationProvider = ({ children }: React.PropsWithChildren) => {
         final: false,
         parts: [],
       });
+      assistantStreamResetRef.current += 1;
+      return true;
     }
+    return false;
+  };
+
+  // Detect BotOutput support from BotReady event
+  useRTVIClientEvent(RTVIEvent.BotReady, (botData: BotReadyData) => {
+    const rtviVersion = botData.version;
+    const supportsBotOutput = isMinVersion(rtviVersion, [1, 1, 0]);
+    setBotOutputSupported(supportsBotOutput);
   });
 
-  useRTVIClientEvent(RTVIEvent.BotTtsText, (data) => {
-    updateAssistantText(data.text, false, "tts");
+  // Track last chunk text per type for spacing detection in BotOutput mode
+  const botOutputLastChunkRef = useRef<{ spoken: string; unspoken: string }>({
+    spoken: "",
+    unspoken: "",
   });
 
-  useRTVIClientEvent(RTVIEvent.BotTtsStopped, () => {
-    // Finalize the TTS text stream
+  useRTVIClientEvent(RTVIEvent.BotOutput, (data: BotOutputData) => {
+    ensureAssistantMessage();
+
+    // Handle spacing for BotOutput chunks
+    let textToAdd = data.text;
+    const lastChunk = data.spoken
+      ? botOutputLastChunkRef.current.spoken
+      : botOutputLastChunkRef.current.unspoken;
+
+    // Add space separator if needed between BotOutput chunks
+    if (lastChunk) {
+      textToAdd = " " + textToAdd;
+    }
+
+    // Update the appropriate last chunk tracker
+    if (data.spoken) {
+      botOutputLastChunkRef.current.spoken = textToAdd;
+    } else {
+      botOutputLastChunkRef.current.unspoken = textToAdd;
+    }
+
+    // Update both spoken and unspoken text streams
+    const isFinal = data.aggregated_by === "sentence";
+    updateAssistantBotOutput(textToAdd, isFinal, data.spoken);
+  });
+
+  // Handle legacy TTS/LLM events (when BotOutput not supported)
+  useBotMessages(
+    {
+      onBotMessageStarted: () => {
+        ensureAssistantMessage();
+      },
+      onBotMessageChunk: (type, text) => {
+        updateAssistantText(text, false, type);
+      },
+      onBotMessageEnded: () => {
+        const store = useConversationStore.getState();
+        const lastAssistant = store.messages.findLast(
+          (m: ConversationMessage) => m.role === "assistant",
+        );
+
+        if (lastAssistant && !lastAssistant.final) {
+          finalizeLastMessage("assistant");
+        }
+      },
+    },
+    botOutputSupported === true,
+  );
+
+  useRTVIClientEvent(RTVIEvent.BotStoppedSpeaking, () => {
+    // Finalize the assistant message when bot stops speaking
+    // This works for both BotOutput and fallback scenarios
     const store = useConversationStore.getState();
     const lastAssistant = store.messages.findLast(
       (m: ConversationMessage) => m.role === "assistant",
@@ -123,6 +184,7 @@ export const ConversationProvider = ({ children }: React.PropsWithChildren) => {
   const contextValue: ConversationContextValue = {
     messages,
     injectMessage,
+    botOutputSupported,
   };
 
   return (

diff --git a/package/src/components/elements/MessageContent.tsx b/package/src/components/elements/MessageContent.tsx
@@ -1,5 +1,6 @@
 import { cn } from "@/lib/utils";
 import {
+  BotOutputText,
   ConversationMessage,
   ConversationMessagePart,
 } from "@/types/conversation";
@@ -30,25 +31,77 @@ interface Props {
   message: ConversationMessage;
 }
 
+/**
+ * Renders BotOutput mode: shows unspoken text muted, spoken text replaces it
+ */
+const renderBotOutput = (spoken: string, unspoken: string): React.ReactNode => {
+  const spokenLength = spoken?.length || 0;
+  const remainingUnspoken = unspoken ? unspoken.slice(spokenLength) : "";
+
+  return (
+    <span>
+      {spoken}
+      {remainingUnspoken && (
+        <span className="text-muted-foreground">{remainingUnspoken}</span>
+      )}
+    </span>
+  );
+};
+
 export const MessageContent = ({ classNames = {}, message }: Props) => {
   const parts = Array.isArray(message.parts) ? message.parts : [];
+
   return (
     <div className={cn("flex flex-col gap-2", classNames.messageContent)}>
       {parts.map((part: ConversationMessagePart, idx: number) => {
         const nextPart = parts?.[idx + 1] ?? null;
         const isText = typeof part.text === "string";
-        const nextIsText = nextPart && typeof nextPart.text === "string";
+        const isBotOutputTextValue = Boolean(
+          part.text &&
+            typeof part.text === "object" &&
+            "spoken" in part.text &&
+            "unspoken" in part.text,
+        );
+        const nextIsText =
+          nextPart &&
+          Boolean(
+            typeof nextPart.text === "string" ||
+              (nextPart.text &&
+                typeof nextPart.text === "object" &&
+                "spoken" in nextPart.text),
+          );
+
+        let content: React.ReactNode;
+        if (isBotOutputTextValue) {
+          const botText = part.text as BotOutputText;
+          content = renderBotOutput(botText.spoken, botText.unspoken);
+        } else {
+          content = part.text as React.ReactNode;
+        }
+
         return (
           <Fragment key={idx}>
-            {isText ? part.text : part.text}
-            {isText && nextIsText ? " " : null}
+            {content}
+            {(isText || isBotOutputTextValue) && nextIsText ? " " : null}
           </Fragment>
         );
       })}
       {parts.length === 0 ||
-      parts.every(
-        (part) => typeof part.text === "string" && part.text.trim() === "",
-      ) ? (
+      parts.every((part) => {
+        if (typeof part.text === "string") {
+          return part.text.trim() === "";
+        }
+        if (
+          part.text &&
+          typeof part.text === "object" &&
+          "spoken" in part.text &&
+          "unspoken" in part.text
+        ) {
+          const botText = part.text as unknown as BotOutputText;
+          return botText.spoken.trim() === "" && botText.unspoken.trim() === "";
+        }
+        return false;
+      }) ? (
         <Thinking className={classNames.thinking} />
       ) : null}
       <div

diff --git a/package/src/components/elements/TranscriptOverlay.tsx b/package/src/components/elements/TranscriptOverlay.tsx
@@ -1,11 +1,13 @@
 "use client";
 
 import { cn } from "@/lib/utils";
-import { type BotTTSTextData, RTVIEvent } from "@pipecat-ai/client-js";
+import { BotOutputData, BotReadyData, RTVIEvent } from "@pipecat-ai/client-js";
 import {
   usePipecatClientTransportState,
   useRTVIClientEvent,
 } from "@pipecat-ai/client-react";
+import { useBotMessages } from "@/hooks/useBotMessages";
+import { isMinVersion } from "@/utils/version";
 import { cva } from "class-variance-authority";
 import { useCallback, useState } from "react";
 
@@ -181,25 +183,63 @@ export const TranscriptOverlay = ({
 }: TranscriptOverlayProps) => {
   const [transcript, setTranscript] = useState<string[]>([]);
   const [turnEnd, setIsTurnEnd] = useState(false);
+  const [botOutputSupported, setBotOutputSupported] = useState(false);
   const transportState = usePipecatClientTransportState();
 
-  useRTVIClientEvent(
-    RTVIEvent.BotTtsText,
-    useCallback(
-      (event: BotTTSTextData) => {
+  // Detect BotOutput support from BotReady event
+  useRTVIClientEvent(RTVIEvent.BotReady, (botData: BotReadyData) => {
+    const rtviVersion = botData.version;
+    const supportsBotOutput = isMinVersion(rtviVersion, [1, 1, 0]);
+    setBotOutputSupported(supportsBotOutput);
+  });
+
+  // Handle BotOutput events (when supported) - only word-level spoken chunks
+  useRTVIClientEvent(RTVIEvent.BotOutput, (data: BotOutputData) => {
+    if (participant === "local" || !botOutputSupported) {
+      return;
+    }
+
+    // Only process word-level outputs that have been spoken
+    // These provide real-time word-by-word streaming for karaoke-like UI
+    if (data.aggregated_by === "word" && data.spoken === true && data.text) {
+      if (turnEnd) {
+        setTranscript([]);
+        setIsTurnEnd(false);
+      }
+
+      setTranscript((prev) => [...prev, data.text]);
+    }
+  });
+
+  // Handle legacy TTS events (when BotOutput not supported)
+  useBotMessages(
+    {
+      onBotMessageChunk: (type, text) => {
         if (participant === "local") {
           return;
         }
 
-        if (turnEnd) {
-          setTranscript([]);
-          setIsTurnEnd(false);
-        }
+        // Only process TTS chunks (spoken content)
+        if (type === "tts") {
+          if (turnEnd) {
+            setTranscript([]);
+            setIsTurnEnd(false);
+          }
 
-        setTranscript((prev) => [...prev, event.text]);
+          setTranscript((prev) => [...prev, text]);
+        }
       },
-      [turnEnd, participant],
-    ),
+      onBotMessageEnded: (type) => {
+        if (participant === "local") {
+          return;
+        }
+        // Only handle TTS ended events
+        if (type === "tts") {
+          setIsTurnEnd(true);
+        }
+      },
+    },
+    botOutputSupported,
   );
 
   useRTVIClientEvent(
@@ -212,16 +252,6 @@ export const TranscriptOverlay = ({
     }, [participant]),
   );
 
-  useRTVIClientEvent(
-    RTVIEvent.BotTtsStopped,
-    useCallback(() => {
-      if (participant === "local") {
-        return;
-      }
-      setIsTurnEnd(true);
-    }, [participant]),
-  );
-
   if (transcript.length === 0 || transportState !== "ready") {
     return null;
   }