diff --git a/CHANGELOG.md b/CHANGELOG.md index ec80ebc..d7d39bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,19 @@ +# 1.1.0 + +- Update signature of `Transport.deserializeConnectParams()` to also take the startBot `APIRequest`. + +- Implemented support for the new `botOutput` RTVI message. This message is now the preferred + way of communicating a holistic view of what the bot "says". It includes a `spoken` field, + indicating whether the text has been spoken along with a field, `aggregated_by`, to indicate what + the text represents. By default, with TTS services that support word-by-word output, you can + expect two `agggregated_by` values for `botOutput` events: `"sentence"` and `"word"`. All + sentence events are guaranteed to be in order, while word events come in at the time of being + spoken. This allows for building karaoke-like UIs where the sentence is displayed and each word + is highlighted as it's spoken. This event also provides continuity across bot output even when + the TTS is skipped or does not exist. And if your pipeline takes advantage of customizing how + the LLM text is aggregated, you can handle custom `aggregated_by` fields, like `"code"` or + `"address"` or `"url"`, allowing the server to do the parsing. + # 1.0.2 - Added new `sendText()` method to support the new RTVI `send-text` event. The method diff --git a/pipecat-client-android/build.gradle.kts b/pipecat-client-android/build.gradle.kts index ae6acd2..b3e01a9 100644 --- a/pipecat-client-android/build.gradle.kts +++ b/pipecat-client-android/build.gradle.kts @@ -60,7 +60,7 @@ publishing { register("release") { groupId = "ai.pipecat" artifactId = "client" - version = "1.0.2" + version = "1.1.0" pom { name.set("Pipecat Client") diff --git a/pipecat-client-android/src/main/java/ai/pipecat/client/PipecatClient.kt b/pipecat-client-android/src/main/java/ai/pipecat/client/PipecatClient.kt index 199610c..2117a15 100644 --- a/pipecat-client-android/src/main/java/ai/pipecat/client/PipecatClient.kt +++ b/pipecat-client-android/src/main/java/ai/pipecat/client/PipecatClient.kt @@ -12,6 +12,7 @@ import ai.pipecat.client.transport.Transport import ai.pipecat.client.transport.TransportContext import ai.pipecat.client.types.APIRequest import ai.pipecat.client.types.AppendToContextResultData +import ai.pipecat.client.types.BotOutputData import ai.pipecat.client.types.BotReadyData import ai.pipecat.client.types.DataMessage import ai.pipecat.client.types.LLMContextMessage @@ -77,11 +78,10 @@ open class PipecatClient, ConnectParams override val thread = this@PipecatClient.thread override fun onConnectionEnd() { - thread.runOnThread { - responseWaiters.clearAll() - connection?.ready?.resolveErr(RTVIError.OperationCancelled) - connection = null - } + thread.assertCurrent() + responseWaiters.clearAll() + connection?.ready?.resolveErr(RTVIError.OperationCancelled) + connection = null } override fun onMessage(msg: MsgServerToClient) = thread.runOnThread { @@ -168,6 +168,13 @@ open class PipecatClient, ConnectParams callbacks.onBotLLMText(data) } + MsgServerToClient.Type.BotOutput -> { + val data: BotOutputData = + JSON_INSTANCE.decodeFromJsonElement(msg.data) + + callbacks.onBotOutput(data) + } + MsgServerToClient.Type.BotTtsText -> { val data: MsgServerToClient.Data.BotTTSTextData = JSON_INSTANCE.decodeFromJsonElement(msg.data) @@ -289,7 +296,7 @@ open class PipecatClient, ConnectParams postResult.mapError { RTVIError.HttpError(it) }.chain { try { - resolvedPromiseOk(thread, transport.deserializeConnectParams(it)) + resolvedPromiseOk(thread, transport.deserializeConnectParams(it, startBotParams)) } catch (e: Exception) { resolvedPromiseErr(thread, RTVIError.ExceptionThrown(e)) } diff --git a/pipecat-client-android/src/main/java/ai/pipecat/client/PipecatEventCallbacks.kt b/pipecat-client-android/src/main/java/ai/pipecat/client/PipecatEventCallbacks.kt index 13c8fb4..610e3f0 100644 --- a/pipecat-client-android/src/main/java/ai/pipecat/client/PipecatEventCallbacks.kt +++ b/pipecat-client-android/src/main/java/ai/pipecat/client/PipecatEventCallbacks.kt @@ -2,6 +2,7 @@ package ai.pipecat.client import ai.pipecat.client.transport.MsgServerToClient import ai.pipecat.client.types.BotLLMSearchResponseData +import ai.pipecat.client.types.BotOutputData import ai.pipecat.client.types.BotReadyData import ai.pipecat.client.types.LLMFunctionCallData import ai.pipecat.client.types.MediaDeviceInfo @@ -116,6 +117,7 @@ abstract class PipecatEventCallbacks { /** * Invoked when bot transcript data is available. */ + @Deprecated("onBotTranscript callback deprecated, please use onBotOutput instead") open fun onBotTranscript(text: String) {} /** @@ -133,6 +135,11 @@ abstract class PipecatEventCallbacks { */ open fun onBotLLMText(data: MsgServerToClient.Data.BotLLMTextData) {} + /** + * Invoked when the bot emits output. + */ + open fun onBotOutput(data: BotOutputData) {} + /** * Invoked when text is spoken by the bot. */ diff --git a/pipecat-client-android/src/main/java/ai/pipecat/client/transport/MsgServerToClient.kt b/pipecat-client-android/src/main/java/ai/pipecat/client/transport/MsgServerToClient.kt index 73058f8..3d6240b 100644 --- a/pipecat-client-android/src/main/java/ai/pipecat/client/transport/MsgServerToClient.kt +++ b/pipecat-client-android/src/main/java/ai/pipecat/client/transport/MsgServerToClient.kt @@ -34,6 +34,7 @@ data class MsgServerToClient( const val BotLlmText = "bot-llm-text" // Streaming chunk/word, directly after LLM const val BotLlmStarted = "bot-llm-started" const val BotLlmStopped = "bot-llm-stopped" + const val BotOutput = "bot-output" const val BotTtsText = "bot-tts-text" const val BotTtsStarted = "bot-tts-started" const val BotTtsStopped = "bot-tts-stopped" @@ -55,5 +56,6 @@ data class MsgServerToClient( data class BotTTSTextData( val text: String ) + } } \ No newline at end of file diff --git a/pipecat-client-android/src/main/java/ai/pipecat/client/transport/Transport.kt b/pipecat-client-android/src/main/java/ai/pipecat/client/transport/Transport.kt index 50738c0..b20d668 100644 --- a/pipecat-client-android/src/main/java/ai/pipecat/client/transport/Transport.kt +++ b/pipecat-client-android/src/main/java/ai/pipecat/client/transport/Transport.kt @@ -2,6 +2,7 @@ package ai.pipecat.client.transport import ai.pipecat.client.result.Future import ai.pipecat.client.result.RTVIError +import ai.pipecat.client.types.APIRequest import ai.pipecat.client.types.MediaDeviceId import ai.pipecat.client.types.MediaDeviceInfo import ai.pipecat.client.types.Tracks @@ -14,7 +15,10 @@ abstract class Transport { abstract fun initialize(ctx: TransportContext) - abstract fun deserializeConnectParams(json: String): ConnectParams + abstract fun deserializeConnectParams( + json: String, + startBotRequest: APIRequest + ): ConnectParams abstract fun initDevices(): Future abstract fun release() diff --git a/pipecat-client-android/src/main/java/ai/pipecat/client/types/BotOutputData.kt b/pipecat-client-android/src/main/java/ai/pipecat/client/types/BotOutputData.kt new file mode 100644 index 0000000..c9c235b --- /dev/null +++ b/pipecat-client-android/src/main/java/ai/pipecat/client/types/BotOutputData.kt @@ -0,0 +1,18 @@ +package ai.pipecat.client.types + +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +/** + * Streaming bot output tokens/chunks. + * + * Example: + * {"text":"your","spoken":true,"aggregated_by":"word"} + */ +@Serializable +data class BotOutputData( + val text: String, + val spoken: Boolean, + @SerialName("aggregated_by") + val aggregatedBy: String +) \ No newline at end of file