Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
"examples/*"
],
"dependencies": {
"@daily-co/daily-js": "^0.84.0",
"@pipecat-ai/client-js": "^1.4.0",
"@daily-co/daily-js": "^0.85.0",
"@pipecat-ai/client-js": "^1.5.0",
"@pipecat-ai/client-react": "^1.1.0",
"react": "^19.2.1",
"react-dom": "^19.2.1",
Expand Down
9 changes: 5 additions & 4 deletions package/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -103,17 +103,18 @@
"lucide-react": "^0.511.0",
"react-chartjs-2": "^5.3.0",
"react-resizable-panels": "^3.0.6",
"semver": "^7.6.3",
"tailwind-merge": "^3.3.1",
"zustand": "^5.0.8"
},
"devDependencies": {
"@daily-co/daily-js": "^0.80.0",
"@daily-co/daily-js": "^0.85.0",
"@eslint/js": "^9.36.0",
"@ladle/react": "^5.0.3",
"@pipecat-ai/client-js": "^1.4.0",
"@pipecat-ai/client-js": "^1.5.0",
"@pipecat-ai/client-react": "^1.1.0",
"@pipecat-ai/daily-transport": "^1.4.0",
"@pipecat-ai/small-webrtc-transport": "^1.5.0",
"@pipecat-ai/daily-transport": "^1.5.0",
"@pipecat-ai/small-webrtc-transport": "^1.8.0",
"@tailwindcss/vite": "^4.1.13",
"@types/node": "^22.18.8",
"@types/react": "^19.1.16",
Expand Down
108 changes: 85 additions & 23 deletions package/src/components/ConversationProvider.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,23 @@ import {
type ConversationMessage,
type ConversationMessagePart,
} from "@/types/conversation";
import { RTVIEvent } from "@pipecat-ai/client-js";
import { useBotMessages } from "@/hooks/useBotMessages";
import { BotOutputData, BotReadyData, RTVIEvent } from "@pipecat-ai/client-js";
import { useRTVIClientEvent } from "@pipecat-ai/client-react";
import { createContext, useContext, useRef } from "react";
import { createContext, useContext, useRef, useState } from "react";
import { isMinVersion } from "@/utils/version";

interface ConversationContextValue {
messages: ConversationMessage[];
injectMessage: (message: {
role: "user" | "assistant" | "system";
parts: ConversationMessagePart[];
}) => void;
/**
* Whether BotOutput events are supported (RTVI 1.1.0+)
* null = unknown (before BotReady), true = supported, false = not supported
*/
botOutputSupported: boolean | null;
}

const ConversationContext = createContext<ConversationContextValue | null>(
Expand All @@ -29,32 +36,24 @@ export const ConversationProvider = ({ children }: React.PropsWithChildren) => {
injectMessage,
upsertUserTranscript,
updateAssistantText,
startAssistantLlmStream,
updateAssistantBotOutput,
} = useConversationStore();

// null = unknown (before BotReady), true = supported, false = not supported
const [botOutputSupported, setBotOutputSupported] = useState<boolean | null>(
null,
);
const userStoppedTimeout = useRef<ReturnType<typeof setTimeout>>(undefined);
const assistantStreamResetRef = useRef<number>(0);

useRTVIClientEvent(RTVIEvent.Connected, () => {
clearMessages();
setBotOutputSupported(null);
botOutputLastChunkRef.current = { spoken: "", unspoken: "" };
});

useRTVIClientEvent(RTVIEvent.BotLlmStarted, () => {
startAssistantLlmStream();
// Nudge a reset counter so any consumer logic can infer fresh turn if needed
assistantStreamResetRef.current += 1;
});

useRTVIClientEvent(RTVIEvent.BotLlmText, (data) => {
updateAssistantText(data.text, false, "llm");
});

useRTVIClientEvent(RTVIEvent.BotLlmStopped, () => {
finalizeLastMessage("assistant");
});

useRTVIClientEvent(RTVIEvent.BotTtsStarted, () => {
// Start a new assistant message for TTS if there isn't one already in progress
// Helper to ensure assistant message exists
const ensureAssistantMessage = () => {
const store = useConversationStore.getState();
const lastAssistantIndex = store.messages.findLastIndex(
(msg: ConversationMessage) => msg.role === "assistant",
Expand All @@ -70,15 +69,77 @@ export const ConversationProvider = ({ children }: React.PropsWithChildren) => {
final: false,
parts: [],
});
assistantStreamResetRef.current += 1;
return true;
}
return false;
};

// Detect BotOutput support from BotReady event
useRTVIClientEvent(RTVIEvent.BotReady, (botData: BotReadyData) => {
const rtviVersion = botData.version;
const supportsBotOutput = isMinVersion(rtviVersion, [1, 1, 0]);
setBotOutputSupported(supportsBotOutput);
});

useRTVIClientEvent(RTVIEvent.BotTtsText, (data) => {
updateAssistantText(data.text, false, "tts");
// Track last chunk text per type for spacing detection in BotOutput mode
const botOutputLastChunkRef = useRef<{ spoken: string; unspoken: string }>({
spoken: "",
unspoken: "",
});

useRTVIClientEvent(RTVIEvent.BotTtsStopped, () => {
// Finalize the TTS text stream
useRTVIClientEvent(RTVIEvent.BotOutput, (data: BotOutputData) => {
ensureAssistantMessage();

// Handle spacing for BotOutput chunks
let textToAdd = data.text;
const lastChunk = data.spoken
? botOutputLastChunkRef.current.spoken
: botOutputLastChunkRef.current.unspoken;

// Add space separator if needed between BotOutput chunks
if (lastChunk) {
textToAdd = " " + textToAdd;
}

// Update the appropriate last chunk tracker
if (data.spoken) {
botOutputLastChunkRef.current.spoken = textToAdd;
} else {
botOutputLastChunkRef.current.unspoken = textToAdd;
}

// Update both spoken and unspoken text streams
const isFinal = data.aggregated_by === "sentence";
updateAssistantBotOutput(textToAdd, isFinal, data.spoken);
});

// Handle legacy TTS/LLM events (when BotOutput not supported)
useBotMessages(
{
onBotMessageStarted: () => {
ensureAssistantMessage();
},
onBotMessageChunk: (type, text) => {
updateAssistantText(text, false, type);
},
onBotMessageEnded: () => {
const store = useConversationStore.getState();
const lastAssistant = store.messages.findLast(
(m: ConversationMessage) => m.role === "assistant",
);

if (lastAssistant && !lastAssistant.final) {
finalizeLastMessage("assistant");
}
},
},
botOutputSupported === true,
);

useRTVIClientEvent(RTVIEvent.BotStoppedSpeaking, () => {
// Finalize the assistant message when bot stops speaking
// This works for both BotOutput and fallback scenarios
const store = useConversationStore.getState();
const lastAssistant = store.messages.findLast(
(m: ConversationMessage) => m.role === "assistant",
Expand Down Expand Up @@ -123,6 +184,7 @@ export const ConversationProvider = ({ children }: React.PropsWithChildren) => {
const contextValue: ConversationContextValue = {
messages,
injectMessage,
botOutputSupported,
};

return (
Expand Down
65 changes: 59 additions & 6 deletions package/src/components/elements/MessageContent.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { cn } from "@/lib/utils";
import {
BotOutputText,
ConversationMessage,
ConversationMessagePart,
} from "@/types/conversation";
Expand Down Expand Up @@ -30,25 +31,77 @@ interface Props {
message: ConversationMessage;
}

/**
* Renders BotOutput mode: shows unspoken text muted, spoken text replaces it
*/
const renderBotOutput = (spoken: string, unspoken: string): React.ReactNode => {
const spokenLength = spoken?.length || 0;
const remainingUnspoken = unspoken ? unspoken.slice(spokenLength) : "";

return (
<span>
{spoken}
{remainingUnspoken && (
<span className="text-muted-foreground">{remainingUnspoken}</span>
)}
</span>
);
};

export const MessageContent = ({ classNames = {}, message }: Props) => {
const parts = Array.isArray(message.parts) ? message.parts : [];

return (
<div className={cn("flex flex-col gap-2", classNames.messageContent)}>
{parts.map((part: ConversationMessagePart, idx: number) => {
const nextPart = parts?.[idx + 1] ?? null;
const isText = typeof part.text === "string";
const nextIsText = nextPart && typeof nextPart.text === "string";
const isBotOutputTextValue = Boolean(
part.text &&
typeof part.text === "object" &&
"spoken" in part.text &&
"unspoken" in part.text,
);
const nextIsText =
nextPart &&
Boolean(
typeof nextPart.text === "string" ||
(nextPart.text &&
typeof nextPart.text === "object" &&
"spoken" in nextPart.text),
);

let content: React.ReactNode;
if (isBotOutputTextValue) {
const botText = part.text as BotOutputText;
content = renderBotOutput(botText.spoken, botText.unspoken);
} else {
content = part.text as React.ReactNode;
}

return (
<Fragment key={idx}>
{isText ? part.text : part.text}
{isText && nextIsText ? " " : null}
{content}
{(isText || isBotOutputTextValue) && nextIsText ? " " : null}
</Fragment>
);
})}
{parts.length === 0 ||
parts.every(
(part) => typeof part.text === "string" && part.text.trim() === "",
) ? (
parts.every((part) => {
if (typeof part.text === "string") {
return part.text.trim() === "";
}
if (
part.text &&
typeof part.text === "object" &&
"spoken" in part.text &&
"unspoken" in part.text
) {
const botText = part.text as unknown as BotOutputText;
return botText.spoken.trim() === "" && botText.unspoken.trim() === "";
}
return false;
}) ? (
<Thinking className={classNames.thinking} />
) : null}
<div
Expand Down
74 changes: 52 additions & 22 deletions package/src/components/elements/TranscriptOverlay.tsx
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
"use client";

import { cn } from "@/lib/utils";
import { type BotTTSTextData, RTVIEvent } from "@pipecat-ai/client-js";
import { BotOutputData, BotReadyData, RTVIEvent } from "@pipecat-ai/client-js";
import {
usePipecatClientTransportState,
useRTVIClientEvent,
} from "@pipecat-ai/client-react";
import { useBotMessages } from "@/hooks/useBotMessages";
import { isMinVersion } from "@/utils/version";
import { cva } from "class-variance-authority";
import { useCallback, useState } from "react";

Expand Down Expand Up @@ -181,25 +183,63 @@ export const TranscriptOverlay = ({
}: TranscriptOverlayProps) => {
const [transcript, setTranscript] = useState<string[]>([]);
const [turnEnd, setIsTurnEnd] = useState(false);
const [botOutputSupported, setBotOutputSupported] = useState(false);
const transportState = usePipecatClientTransportState();

useRTVIClientEvent(
RTVIEvent.BotTtsText,
useCallback(
(event: BotTTSTextData) => {
// Detect BotOutput support from BotReady event
useRTVIClientEvent(RTVIEvent.BotReady, (botData: BotReadyData) => {
const rtviVersion = botData.version;
const supportsBotOutput = isMinVersion(rtviVersion, [1, 1, 0]);
setBotOutputSupported(supportsBotOutput);
});

// Handle BotOutput events (when supported) - only word-level spoken chunks
useRTVIClientEvent(RTVIEvent.BotOutput, (data: BotOutputData) => {
if (participant === "local" || !botOutputSupported) {
return;
}

// Only process word-level outputs that have been spoken
// These provide real-time word-by-word streaming for karaoke-like UI
if (data.aggregated_by === "word" && data.spoken === true && data.text) {
if (turnEnd) {
setTranscript([]);
setIsTurnEnd(false);
}

setTranscript((prev) => [...prev, data.text]);
}
});

// Handle legacy TTS events (when BotOutput not supported)
useBotMessages(
{
onBotMessageChunk: (type, text) => {
if (participant === "local") {
return;
}

if (turnEnd) {
setTranscript([]);
setIsTurnEnd(false);
}
// Only process TTS chunks (spoken content)
if (type === "tts") {
if (turnEnd) {
setTranscript([]);
setIsTurnEnd(false);
}

setTranscript((prev) => [...prev, event.text]);
setTranscript((prev) => [...prev, text]);
}
},
[turnEnd, participant],
),
onBotMessageEnded: (type) => {
if (participant === "local") {
return;
}
// Only handle TTS ended events
if (type === "tts") {
setIsTurnEnd(true);
}
},
},
botOutputSupported,
);

useRTVIClientEvent(
Expand All @@ -212,16 +252,6 @@ export const TranscriptOverlay = ({
}, [participant]),
);

useRTVIClientEvent(
RTVIEvent.BotTtsStopped,
useCallback(() => {
if (participant === "local") {
return;
}
setIsTurnEnd(true);
}, [participant]),
);

if (transcript.length === 0 || transportState !== "ready") {
return null;
}
Expand Down
Loading