diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index fcddd24f0..38ad480b7 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -64,7 +64,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: "1.24.3" + go-version: "1.25.5" - name: Set up Python uses: actions/setup-python@v5 diff --git a/.github/workflows/release-pipeline.yml b/.github/workflows/release-pipeline.yml index b89e7e729..659392f48 100644 --- a/.github/workflows/release-pipeline.yml +++ b/.github/workflows/release-pipeline.yml @@ -81,7 +81,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: "1.24.3" + go-version: "1.25.5" - name: Configure Git run: | git config user.name "GitHub Actions Bot" @@ -139,7 +139,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: "1.24.3" + go-version: "1.25.5" - name: Configure Git run: | @@ -215,7 +215,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: "1.24.3" + go-version: "1.25.5" - name: Configure Git run: | @@ -294,7 +294,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: "1.24.3" + go-version: "1.25.5" - name: Set up Node.js uses: actions/setup-node@v4 diff --git a/.github/workflows/snyk.yml b/.github/workflows/snyk.yml index 1cd876a39..fe2111bd4 100644 --- a/.github/workflows/snyk.yml +++ b/.github/workflows/snyk.yml @@ -2,9 +2,9 @@ name: Snyk checks on: push: - branches: [main, master, '**/*'] + branches: [main, master, "**/*"] pull_request: - branches: ['**/*'] + branches: ["**/*"] workflow_dispatch: permissions: @@ -44,17 +44,17 @@ jobs: - name: Setup Node (for UI) uses: actions/setup-node@v4 with: - node-version: '20' + node-version: "20" - name: Setup Python (for tests tooling) uses: actions/setup-python@v5 with: - python-version: '3.11' + python-version: "3.11" - name: Setup Go uses: actions/setup-go@v5 with: - go-version: '1.24.3' + go-version: "1.25.5" - name: Install Snyk CLI uses: snyk/actions/setup@master @@ -82,22 +82,22 @@ jobs: - name: Setup Node (for UI) uses: actions/setup-node@v4 with: - node-version: '20' + node-version: "20" - name: Setup Python (for tests tooling) uses: actions/setup-python@v5 with: - python-version: '3.11' + python-version: "3.11" - name: Setup Python (for tests tooling) uses: actions/setup-python@v5 with: - python-version: '3.11' - cache: 'pip' + python-version: "3.11" + cache: "pip" cache-dependency-path: | tests/integrations/requirements.txt tests/governance/requirements.txt - + - name: Install Python dependencies (tests tooling) run: | python -m pip install --disable-pip-version-check \ @@ -107,7 +107,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: '1.24.3' + go-version: "1.25.5" - name: Build run: make build diff --git a/Makefile b/Makefile index 1b218f15a..c56679d40 100644 --- a/Makefile +++ b/Makefile @@ -213,7 +213,7 @@ _build-with-docker: # Internal target for Docker-based cross-compilation -e GOOS=$(TARGET_OS) \ -e GOARCH=$(TARGET_ARCH) \ $(if $(LOCAL),,-e GOWORK=off) \ - golang:1.24.3-alpine3.22 \ + golang:1.25.5-alpine3.22 \ sh -c "apk add --no-cache gcc musl-dev && \ go build \ -ldflags='-w -s -X main.Version=v$(VERSION)' \ @@ -230,7 +230,7 @@ _build-with-docker: # Internal target for Docker-based cross-compilation -e GOOS=$(TARGET_OS) \ -e GOARCH=$(TARGET_ARCH) \ $(if $(LOCAL),,-e GOWORK=off) \ - golang:1.24.3-alpine3.22 \ + golang:1.25.5-alpine3.22 \ sh -c "apk add --no-cache gcc musl-dev && \ go build \ -ldflags='-w -s -extldflags "-static" -X main.Version=v$(VERSION)' \ diff --git a/core/internal/testutil/account.go b/core/internal/testutil/account.go index 7bfd3435b..a39ba3dba 100644 --- a/core/internal/testutil/account.go +++ b/core/internal/testutil/account.go @@ -53,6 +53,7 @@ type TestScenarios struct { FileDelete bool // File API delete functionality FileContent bool // File API content download functionality FileBatchInput bool // Whether batch create supports file-based input (InputFileID) + ChatAudio bool // Chat completion with audio input/output functionality } // ComprehensiveTestConfig extends TestConfig with additional scenarios @@ -66,6 +67,7 @@ type ComprehensiveTestConfig struct { EmbeddingModel string TranscriptionModel string SpeechSynthesisModel string + ChatAudioModel string Scenarios TestScenarios Fallbacks []schemas.Fallback // for chat, responses, image and reasoning tests TextCompletionFallbacks []schemas.Fallback // for text completion tests @@ -73,6 +75,8 @@ type ComprehensiveTestConfig struct { SpeechSynthesisFallbacks []schemas.Fallback // for speech synthesis tests EmbeddingFallbacks []schemas.Fallback // for embedding tests SkipReason string // Reason to skip certain tests + ExternalTTSProvider schemas.ModelProvider // External TTS provider to use for testing + ExternalTTSModel string // External TTS model to use for testing BatchExtraParams map[string]interface{} // Extra params for batch operations (e.g., role_arn, output_s3_uri for Bedrock) FileExtraParams map[string]interface{} // Extra params for file operations (e.g., s3_bucket for Bedrock) } @@ -161,25 +165,25 @@ func (account *ComprehensiveTestAccount) GetKeysForProvider(ctx *context.Context }, }, }, - { - Models: []string{}, - Weight: 1.0, - BedrockKeyConfig: &schemas.BedrockKeyConfig{ - AccessKey: os.Getenv("AWS_ACCESS_KEY_ID"), - SecretKey: os.Getenv("AWS_SECRET_ACCESS_KEY"), - SessionToken: bifrost.Ptr(os.Getenv("AWS_SESSION_TOKEN")), - Region: bifrost.Ptr(getEnvWithDefault("AWS_REGION", "us-east-1")), - ARN: bifrost.Ptr(os.Getenv("AWS_BEDROCK_ARN")), - Deployments: map[string]string{ - "claude-3.5-sonnet": "anthropic.claude-3-5-sonnet-20240620-v1:0", - "claude-3.7-sonnet": "us.anthropic.claude-3-7-sonnet-20250219-v1:0", - "claude-4-sonnet": "global.anthropic.claude-sonnet-4-20250514-v1:0", - "claude-4.5-sonnet": "global.anthropic.claude-sonnet-4-5-20250929-v1:0", - "claude-4.5-haiku": "global.anthropic.claude-haiku-4-5-20251001-v1:0", + { + Models: []string{}, + Weight: 1.0, + BedrockKeyConfig: &schemas.BedrockKeyConfig{ + AccessKey: os.Getenv("AWS_ACCESS_KEY_ID"), + SecretKey: os.Getenv("AWS_SECRET_ACCESS_KEY"), + SessionToken: bifrost.Ptr(os.Getenv("AWS_SESSION_TOKEN")), + Region: bifrost.Ptr(getEnvWithDefault("AWS_REGION", "us-east-1")), + ARN: bifrost.Ptr(os.Getenv("AWS_BEDROCK_ARN")), + Deployments: map[string]string{ + "claude-3.5-sonnet": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "claude-3.7-sonnet": "us.anthropic.claude-3-7-sonnet-20250219-v1:0", + "claude-4-sonnet": "global.anthropic.claude-sonnet-4-20250514-v1:0", + "claude-4.5-sonnet": "global.anthropic.claude-sonnet-4-5-20250929-v1:0", + "claude-4.5-haiku": "global.anthropic.claude-haiku-4-5-20251001-v1:0", + }, }, + UseForBatchAPI: bifrost.Ptr(true), }, - UseForBatchAPI: bifrost.Ptr(true), - }, { Models: []string{"cohere.embed-v4:0"}, Weight: 1.0, @@ -218,6 +222,20 @@ func (account *ComprehensiveTestAccount) GetKeysForProvider(ctx *context.Context }, UseForBatchAPI: bifrost.Ptr(true), }, + { + Value: os.Getenv("AZURE_API_KEY"), + Models: []string{}, + Weight: 1.0, + AzureKeyConfig: &schemas.AzureKeyConfig{ + Endpoint: os.Getenv("AZURE_ENDPOINT"), + APIVersion: bifrost.Ptr("2025-01-01-preview"), + Deployments: map[string]string{ + "whisper": "whisper", + "gpt-4o-mini-tts": "gpt-4o-mini-tts", + "gpt-4o-mini-audio-preview": "gpt-4o-mini-audio-preview", + }, + }, + }, }, nil case schemas.Vertex: return []schemas.Key{ @@ -587,6 +605,7 @@ var AllProviderConfigs = []ComprehensiveTestConfig{ PromptCachingModel: "gpt-4.1", TranscriptionModel: "whisper-1", SpeechSynthesisModel: "tts-1", + ChatAudioModel: "gpt-4o-mini-audio-preview", Scenarios: TestScenarios{ TextCompletion: false, // Not supported TextCompletionStream: false, // Not supported @@ -618,6 +637,7 @@ var AllProviderConfigs = []ComprehensiveTestConfig{ FileRetrieve: true, // OpenAI supports file API FileDelete: true, // OpenAI supports file API FileContent: true, // OpenAI supports file API + ChatAudio: true, // OpenAI supports chat audio }, Fallbacks: []schemas.Fallback{ {Provider: schemas.Anthropic, Model: "claude-3-7-sonnet-20250219"}, @@ -725,9 +745,12 @@ var AllProviderConfigs = []ComprehensiveTestConfig{ }, }, { - Provider: schemas.Azure, - ChatModel: "gpt-4o", - TextModel: "", // Azure doesn't support text completion in newer models + Provider: schemas.Azure, + ChatModel: "gpt-4o", + TextModel: "", // Azure doesn't support text completion in newer models + ChatAudioModel: "gpt-4o-mini-audio-preview", + TranscriptionModel: "whisper-1", + SpeechSynthesisModel: "gpt-4o-mini-tts", Scenarios: TestScenarios{ TextCompletion: false, // Not supported SimpleChat: true, @@ -741,10 +764,10 @@ var AllProviderConfigs = []ComprehensiveTestConfig{ ImageBase64: true, MultipleImages: true, CompleteEnd2End: true, - SpeechSynthesis: false, // Not supported yet - SpeechSynthesisStream: false, // Not supported yet - Transcription: false, // Not supported yet - TranscriptionStream: false, // Not supported yet + SpeechSynthesis: true, // Supported via gpt-4o-mini-tts + SpeechSynthesisStream: true, // Supported via gpt-4o-mini-tts + Transcription: true, // Supported via whisper-1 + TranscriptionStream: false, // Not properly supported yet by Azure Embedding: true, ListModels: true, BatchCreate: true, // Azure supports batch API @@ -757,6 +780,7 @@ var AllProviderConfigs = []ComprehensiveTestConfig{ FileRetrieve: true, // Azure supports file API FileDelete: true, // Azure supports file API FileContent: true, // Azure supports file API + ChatAudio: true, // Azure supports chat audio }, Fallbacks: []schemas.Fallback{ {Provider: schemas.OpenAI, Model: "gpt-4o-mini"}, diff --git a/core/internal/testutil/chat_audio.go b/core/internal/testutil/chat_audio.go new file mode 100644 index 000000000..98cc6de05 --- /dev/null +++ b/core/internal/testutil/chat_audio.go @@ -0,0 +1,318 @@ +package testutil + +import ( + "context" + "os" + "strings" + "testing" + + bifrost "github.com/maximhq/bifrost/core" + "github.com/maximhq/bifrost/core/schemas" +) + +// RunChatAudioTest executes the chat audio test scenario +func RunChatAudioTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) { + if !testConfig.Scenarios.ChatAudio || testConfig.ChatAudioModel == "" { + t.Logf("Chat audio not supported for provider %s", testConfig.Provider) + return + } + + t.Run("ChatAudio", func(t *testing.T) { + if os.Getenv("SKIP_PARALLEL_TESTS") != "true" { + t.Parallel() + } + + // Load sample audio file and encode as base64 + encodedAudio, err := GetSampleAudioBase64() + if err != nil { + t.Fatalf("Failed to load sample audio file: %v", err) + } + + // Create chat message with audio input + chatMessages := []schemas.ChatMessage{ + CreateAudioChatMessage("Describe in detail the spoken audio input.", encodedAudio, "mp3"), + } + + // Use retry framework for audio requests + retryConfig := GetTestRetryConfigForScenario("ChatAudio", testConfig) + retryContext := TestRetryContext{ + ScenarioName: "ChatAudio", + ExpectedBehavior: map[string]interface{}{ + "should_process_audio": true, + "should_return_audio": true, + "should_return_transcript": true, + }, + TestMetadata: map[string]interface{}{ + "provider": testConfig.Provider, + "model": testConfig.ChatAudioModel, + }, + } + + // Create Chat Completions retry config + chatRetryConfig := ChatRetryConfig{ + MaxAttempts: retryConfig.MaxAttempts, + BaseDelay: retryConfig.BaseDelay, + MaxDelay: retryConfig.MaxDelay, + Conditions: []ChatRetryCondition{}, + OnRetry: retryConfig.OnRetry, + OnFinalFail: retryConfig.OnFinalFail, + } + + // Test Chat Completions API with audio + chatOperation := func() (*schemas.BifrostChatResponse, *schemas.BifrostError) { + chatReq := &schemas.BifrostChatRequest{ + Provider: testConfig.Provider, + Model: testConfig.ChatAudioModel, + Input: chatMessages, + Params: &schemas.ChatParameters{ + Modalities: []string{"text", "audio"}, + Audio: &schemas.ChatAudioParameters{ + Voice: "alloy", + Format: "wav", // output format + }, + MaxCompletionTokens: bifrost.Ptr(200), + }, + Fallbacks: testConfig.Fallbacks, + } + response, err := client.ChatCompletionRequest(ctx, chatReq) + if err != nil { + return nil, err + } + if response != nil { + return response, nil + } + return nil, &schemas.BifrostError{ + IsBifrostError: true, + Error: &schemas.ErrorField{ + Message: "No chat response returned", + }, + } + } + + expectations := GetExpectationsForScenario("ChatAudio", testConfig, map[string]interface{}{}) + expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider) + + chatResponse, chatError := WithChatTestRetry(t, chatRetryConfig, retryContext, expectations, "ChatAudio", chatOperation) + + // Check that the request succeeded + if chatError != nil { + t.Fatalf("❌ Chat Completions API failed: %s", GetErrorMessage(chatError)) + } + + if chatResponse == nil { + t.Fatal("❌ Chat response should not be nil") + } + + if len(chatResponse.Choices) == 0 { + t.Fatal("❌ Chat response should have at least one choice") + } + + choice := chatResponse.Choices[0] + if choice.ChatNonStreamResponseChoice == nil { + t.Fatal("❌ Expected non-streaming response choice") + } + + message := choice.ChatNonStreamResponseChoice.Message + if message == nil { + t.Fatal("❌ Message should not be nil") + } + + // Check for audio in the response + if message.ChatAssistantMessage == nil { + t.Fatal("❌ Expected ChatAssistantMessage") + } + + if message.ChatAssistantMessage.Audio == nil { + t.Fatal("❌ Expected audio in response (choices[0].message.audio should be present)") + } + + audio := message.ChatAssistantMessage.Audio + if audio.Data == "" { + t.Error("❌ Expected audio.data to be present in response") + } else { + t.Logf("✅ Audio data present in response (length: %d)", len(audio.Data)) + } + + if audio.Transcript == "" { + t.Error("❌ Expected audio.transcript to be present in response") + } else { + t.Logf("✅ Audio transcript present in response: %s", audio.Transcript) + } + + // Log the content if available + if message.Content != nil && message.Content.ContentStr != nil { + t.Logf("✅ Chat response content: %s", *message.Content.ContentStr) + } + + t.Logf("🎉 ChatAudio test passed!") + }) +} + +// RunChatAudioStreamTest executes the chat audio streaming test scenario +func RunChatAudioStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) { + if !testConfig.Scenarios.ChatAudio || testConfig.ChatAudioModel == "" { + t.Logf("Chat audio streaming not supported for provider %s", testConfig.Provider) + return + } + + t.Run("ChatAudioStream", func(t *testing.T) { + if os.Getenv("SKIP_PARALLEL_TESTS") != "true" { + t.Parallel() + } + + // Load sample audio file and encode as base64 + encodedAudio, err := GetSampleAudioBase64() + if err != nil { + t.Fatalf("Failed to load sample audio file: %v", err) + } + + // Create chat message with audio input + chatMessages := []schemas.ChatMessage{ + CreateAudioChatMessage("Describe in detail the spoken audio input.", encodedAudio, "mp3"), + } + + // Use retry framework for audio streaming requests + retryConfig := StreamingRetryConfig() + retryContext := TestRetryContext{ + ScenarioName: "ChatAudioStream", + ExpectedBehavior: map[string]interface{}{ + "should_process_audio": true, + "should_return_audio": true, + "should_return_transcript": true, + }, + TestMetadata: map[string]interface{}{ + "provider": testConfig.Provider, + "model": testConfig.ChatAudioModel, + }, + } + + // Test Chat Completions Stream API with audio + chatReq := &schemas.BifrostChatRequest{ + Provider: testConfig.Provider, + Model: testConfig.ChatAudioModel, + Input: chatMessages, + Params: &schemas.ChatParameters{ + Modalities: []string{"text", "audio"}, + Audio: &schemas.ChatAudioParameters{ + Voice: "alloy", + Format: "pcm16", // output format + }, + }, + Fallbacks: testConfig.Fallbacks, + } + + responseChannel, bifrostErr := WithStreamRetry(t, retryConfig, retryContext, func() (chan *schemas.BifrostStream, *schemas.BifrostError) { + return client.ChatCompletionStreamRequest(ctx, chatReq) + }) + + // Enhanced error handling + if bifrostErr != nil { + t.Fatalf("Chat audio stream request failed: %v", bifrostErr) + } + if responseChannel == nil { + t.Fatal("Response channel should not be nil") + } + + // Accumulate stream chunks + var chunks []*schemas.BifrostStream + var audioData strings.Builder + var audioTranscript strings.Builder + var audioID string + var audioExpiresAt int + var lastUsage *schemas.BifrostLLMUsage + + for chunk := range responseChannel { + chunks = append(chunks, chunk) + + if chunk.BifrostError != nil && chunk.BifrostError.Error != nil { + t.Fatalf("Stream error: %v", chunk.BifrostError.Error) + } + + if chunk.BifrostChatResponse != nil { + if len(chunk.BifrostChatResponse.Choices) > 0 { + choice := chunk.BifrostChatResponse.Choices[0] + + // Accumulate text content + if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil { + delta := choice.ChatStreamResponseChoice.Delta + + // Accumulate audio data from delta + if delta.Audio != nil { + if delta.Audio.Data != "" { + audioData.WriteString(delta.Audio.Data) + } + if delta.Audio.Transcript != "" { + audioTranscript.WriteString(delta.Audio.Transcript) + } + if delta.Audio.ID != "" { + audioID = delta.Audio.ID + } + if delta.Audio.ExpiresAt != 0 { + audioExpiresAt = delta.Audio.ExpiresAt + } + } + } + } + + // Capture final usage + if chunk.BifrostChatResponse.Usage != nil { + lastUsage = chunk.BifrostChatResponse.Usage + } + } + } + + // Validate that we received chunks + if len(chunks) == 0 { + t.Fatal("❌ Expected to receive stream chunks") + } + + t.Logf("✅ Received %d stream chunks", len(chunks)) + + // Validate accumulated audio data (check overall, not per-chunk) + accumulatedAudioData := audioData.String() + accumulatedTranscript := audioTranscript.String() + + // Check overall: at least one of audio data or transcript should be present + if accumulatedAudioData == "" && accumulatedTranscript == "" { + t.Fatal("❌ Expected overall audio data or transcript to be present in stream chunks") + } + + if accumulatedAudioData != "" { + t.Logf("✅ Accumulated audio data (length: %d)", len(accumulatedAudioData)) + } else { + t.Logf("⚠️ No accumulated audio data found") + } + + if accumulatedTranscript != "" { + t.Logf("✅ Accumulated audio transcript: %s", accumulatedTranscript) + } else { + t.Logf("⚠️ No accumulated audio transcript found") + } + + // Validate audio metadata + if audioID != "" { + t.Logf("✅ Audio ID: %s", audioID) + } + if audioExpiresAt != 0 { + t.Logf("✅ Audio expires at: %d", audioExpiresAt) + } + + // Validate usage if available + if lastUsage != nil { + t.Logf("✅ Token usage - Prompt: %d, Completion: %d, Total: %d", + lastUsage.PromptTokens, + lastUsage.CompletionTokens, + lastUsage.TotalTokens) + + // Check for audio tokens + if lastUsage.PromptTokensDetails != nil && lastUsage.PromptTokensDetails.AudioTokens > 0 { + t.Logf("✅ Input audio tokens: %d", lastUsage.PromptTokensDetails.AudioTokens) + } + if lastUsage.CompletionTokensDetails != nil && lastUsage.CompletionTokensDetails.AudioTokens > 0 { + t.Logf("✅ Output audio tokens: %d", lastUsage.CompletionTokensDetails.AudioTokens) + } + } + + t.Logf("🎉 ChatAudioStream test passed!") + }) +} diff --git a/core/internal/testutil/scenarios/media/sample.mp3 b/core/internal/testutil/scenarios/media/sample.mp3 new file mode 100644 index 000000000..c2ef055f1 Binary files /dev/null and b/core/internal/testutil/scenarios/media/sample.mp3 differ diff --git a/core/internal/testutil/tests.go b/core/internal/testutil/tests.go index f9a7401e5..aa0b3eaf0 100644 --- a/core/internal/testutil/tests.go +++ b/core/internal/testutil/tests.go @@ -64,6 +64,8 @@ func RunAllComprehensiveTests(t *testing.T, client *bifrost.Bifrost, ctx context RunFileContentTest, RunFileUnsupportedTest, RunFileAndBatchIntegrationTest, + RunChatAudioTest, + RunChatAudioStreamTest, } // Execute all test scenarios @@ -116,6 +118,8 @@ func printTestSummary(t *testing.T, testConfig ComprehensiveTestConfig) { {"FileContent", testConfig.Scenarios.FileContent}, {"FileUnsupported", !testConfig.Scenarios.FileUpload && !testConfig.Scenarios.FileList && !testConfig.Scenarios.FileRetrieve && !testConfig.Scenarios.FileDelete && !testConfig.Scenarios.FileContent}, {"FileAndBatchIntegration", testConfig.Scenarios.FileBatchInput}, + {"ChatAudio", testConfig.Scenarios.ChatAudio && testConfig.ChatAudioModel != ""}, + {"ChatAudioStream", testConfig.Scenarios.ChatAudio && testConfig.ChatAudioModel != ""}, } supported := 0 diff --git a/core/internal/testutil/transcription.go b/core/internal/testutil/transcription.go index 0e5c1d18b..a577e7905 100644 --- a/core/internal/testutil/transcription.go +++ b/core/internal/testutil/transcription.go @@ -59,11 +59,21 @@ func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Con t.Parallel() } + speechSynthesisProvider := testConfig.Provider + if testConfig.ExternalTTSProvider != "" { + speechSynthesisProvider = testConfig.ExternalTTSProvider + } + + speechSynthesisModel := testConfig.SpeechSynthesisModel + if testConfig.ExternalTTSModel != "" { + speechSynthesisModel = testConfig.ExternalTTSModel + } + // Step 1: Generate TTS audio - voice := GetProviderVoice(testConfig.Provider, tc.voiceType) + voice := GetProviderVoice(speechSynthesisProvider, tc.voiceType) ttsRequest := &schemas.BifrostSpeechRequest{ - Provider: testConfig.Provider, - Model: testConfig.SpeechSynthesisModel, + Provider: speechSynthesisProvider, + Model: speechSynthesisModel, Input: &schemas.SpeechInput{ Input: tc.text, }, @@ -84,8 +94,8 @@ func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Con "should_generate_audio": true, }, TestMetadata: map[string]interface{}{ - "provider": testConfig.Provider, - "model": testConfig.SpeechSynthesisModel, + "provider": speechSynthesisProvider, + "model": speechSynthesisModel, "format": tc.format, }, } @@ -209,8 +219,18 @@ func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Con t.Parallel() } + speechSynthesisProvider := testConfig.Provider + if testConfig.ExternalTTSProvider != "" { + speechSynthesisProvider = testConfig.ExternalTTSProvider + } + + speechSynthesisModel := testConfig.SpeechSynthesisModel + if testConfig.ExternalTTSModel != "" { + speechSynthesisModel = testConfig.ExternalTTSModel + } + // Use the utility function to generate audio - audioData, _ := GenerateTTSAudioForTest(ctx, t, client, testConfig.Provider, testConfig.SpeechSynthesisModel, tc.text, "primary", "mp3") + audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, tc.text, "primary", "mp3") // Test transcription request := &schemas.BifrostTranscriptionRequest{ @@ -292,8 +312,18 @@ func RunTranscriptionAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx con t.Parallel() } + speechSynthesisProvider := testConfig.Provider + if testConfig.ExternalTTSProvider != "" { + speechSynthesisProvider = testConfig.ExternalTTSProvider + } + + speechSynthesisModel := testConfig.SpeechSynthesisModel + if testConfig.ExternalTTSModel != "" { + speechSynthesisModel = testConfig.ExternalTTSModel + } + // Generate fresh audio for each test to avoid race conditions and ensure validity - audioData, _ := GenerateTTSAudioForTest(ctx, t, client, testConfig.Provider, testConfig.SpeechSynthesisModel, TTSTestTextBasic, "primary", "mp3") + audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextBasic, "primary", "mp3") formatCopy := format request := &schemas.BifrostTranscriptionRequest{ @@ -360,8 +390,18 @@ func RunTranscriptionAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx con t.Parallel() } + speechSynthesisProvider := testConfig.Provider + if testConfig.ExternalTTSProvider != "" { + speechSynthesisProvider = testConfig.ExternalTTSProvider + } + + speechSynthesisModel := testConfig.SpeechSynthesisModel + if testConfig.ExternalTTSModel != "" { + speechSynthesisModel = testConfig.ExternalTTSModel + } + // Generate audio for custom parameters test - audioData, _ := GenerateTTSAudioForTest(ctx, t, client, testConfig.Provider, testConfig.SpeechSynthesisModel, TTSTestTextMedium, "secondary", "mp3") + audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextMedium, "secondary", "mp3") // Test with custom parameters and temperature request := &schemas.BifrostTranscriptionRequest{ @@ -432,8 +472,18 @@ func RunTranscriptionAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx con t.Parallel() } + speechSynthesisProvider := testConfig.Provider + if testConfig.ExternalTTSProvider != "" { + speechSynthesisProvider = testConfig.ExternalTTSProvider + } + + speechSynthesisModel := testConfig.SpeechSynthesisModel + if testConfig.ExternalTTSModel != "" { + speechSynthesisModel = testConfig.ExternalTTSModel + } + // Generate fresh audio for each test to avoid race conditions and ensure validity - audioData, _ := GenerateTTSAudioForTest(ctx, t, client, testConfig.Provider, testConfig.SpeechSynthesisModel, TTSTestTextBasic, "primary", "mp3") + audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextBasic, "primary", "mp3") langCopy := lang request := &schemas.BifrostTranscriptionRequest{ diff --git a/core/internal/testutil/transcription_stream.go b/core/internal/testutil/transcription_stream.go index 25d4e31dd..1f27dcf1f 100644 --- a/core/internal/testutil/transcription_stream.go +++ b/core/internal/testutil/transcription_stream.go @@ -9,7 +9,6 @@ import ( "testing" "time" - bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" ) @@ -63,11 +62,21 @@ func RunTranscriptionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx conte t.Parallel() } + speechSynthesisProvider := testConfig.Provider + if testConfig.ExternalTTSProvider != "" { + speechSynthesisProvider = testConfig.ExternalTTSProvider + } + + speechSynthesisModel := testConfig.SpeechSynthesisModel + if testConfig.ExternalTTSModel != "" { + speechSynthesisModel = testConfig.ExternalTTSModel + } + // Step 1: Generate TTS audio - voice := GetProviderVoice(testConfig.Provider, tc.voiceType) + voice := GetProviderVoice(speechSynthesisProvider, tc.voiceType) ttsRequest := &schemas.BifrostSpeechRequest{ - Provider: testConfig.Provider, - Model: testConfig.SpeechSynthesisModel, + Provider: speechSynthesisProvider, + Model: speechSynthesisModel, Input: &schemas.SpeechInput{ Input: tc.text, }, @@ -88,8 +97,8 @@ func RunTranscriptionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx conte "should_generate_audio": true, }, TestMetadata: map[string]interface{}{ - "provider": testConfig.Provider, - "model": testConfig.SpeechSynthesisModel, + "provider": speechSynthesisProvider, + "model": speechSynthesisModel, }, } ttsExpectations := SpeechExpectations(100) @@ -335,8 +344,18 @@ func RunTranscriptionStreamAdvancedTest(t *testing.T, client *bifrost.Bifrost, c t.Parallel() } + speechSynthesisProvider := testConfig.Provider + if testConfig.ExternalTTSProvider != "" { + speechSynthesisProvider = testConfig.ExternalTTSProvider + } + + speechSynthesisModel := testConfig.SpeechSynthesisModel + if testConfig.ExternalTTSModel != "" { + speechSynthesisModel = testConfig.ExternalTTSModel + } + // Generate audio for streaming test - audioData, _ := GenerateTTSAudioForTest(ctx, t, client, testConfig.Provider, testConfig.SpeechSynthesisModel, TTSTestTextBasic, "primary", "mp3") + audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextBasic, "primary", "mp3") // Test streaming with JSON format request := &schemas.BifrostTranscriptionRequest{ @@ -420,8 +439,18 @@ func RunTranscriptionStreamAdvancedTest(t *testing.T, client *bifrost.Bifrost, c t.Parallel() } + speechSynthesisProvider := testConfig.Provider + if testConfig.ExternalTTSProvider != "" { + speechSynthesisProvider = testConfig.ExternalTTSProvider + } + + speechSynthesisModel := testConfig.SpeechSynthesisModel + if testConfig.ExternalTTSModel != "" { + speechSynthesisModel = testConfig.ExternalTTSModel + } + // Generate audio for language streaming tests - audioData, _ := GenerateTTSAudioForTest(ctx, t, client, testConfig.Provider, testConfig.SpeechSynthesisModel, TTSTestTextBasic, "primary", "mp3") + audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextBasic, "primary", "mp3") // Test streaming with different language hints (only English for now) languages := []string{"en"} @@ -509,8 +538,18 @@ func RunTranscriptionStreamAdvancedTest(t *testing.T, client *bifrost.Bifrost, c t.Parallel() } + speechSynthesisProvider := testConfig.Provider + if testConfig.ExternalTTSProvider != "" { + speechSynthesisProvider = testConfig.ExternalTTSProvider + } + + speechSynthesisModel := testConfig.SpeechSynthesisModel + if testConfig.ExternalTTSModel != "" { + speechSynthesisModel = testConfig.ExternalTTSModel + } + // Generate audio for custom prompt streaming test - audioData, _ := GenerateTTSAudioForTest(ctx, t, client, testConfig.Provider, testConfig.SpeechSynthesisModel, TTSTestTextTechnical, "tertiary", "mp3") + audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextTechnical, "tertiary", "mp3") // Test streaming with custom prompt for context request := &schemas.BifrostTranscriptionRequest{ diff --git a/core/internal/testutil/utils.go b/core/internal/testutil/utils.go index 12b8109aa..5f37dc6be 100644 --- a/core/internal/testutil/utils.go +++ b/core/internal/testutil/utils.go @@ -2,6 +2,7 @@ package testutil import ( "context" + "encoding/base64" "fmt" "os" "path/filepath" @@ -215,6 +216,22 @@ func GetLionBase64Image() (string, error) { return "data:image/png;base64," + string(data), nil } +// GetSampleAudioBase64 loads and returns the sample audio file as base64 encoded string +func GetSampleAudioBase64() (string, error) { + _, filename, _, ok := runtime.Caller(0) + if !ok { + return "", fmt.Errorf("failed to get current file path") + } + dir := filepath.Dir(filename) + filePath := filepath.Join(dir, "scenarios", "media", "sample.mp3") + + data, err := os.ReadFile(filePath) + if err != nil { + return "", err + } + return base64.StdEncoding.EncodeToString(data), nil +} + // CreateSpeechRequest creates a basic speech input for testing func CreateSpeechRequest(text, voice, format string) *schemas.BifrostSpeechRequest { return &schemas.BifrostSpeechRequest{ @@ -292,6 +309,25 @@ func CreateImageResponsesMessage(text, imageURL string) schemas.ResponsesMessage } } +func CreateAudioChatMessage(text, audioData string, audioFormat string) schemas.ChatMessage { + format := bifrost.Ptr(audioFormat) + return schemas.ChatMessage{ + Role: schemas.ChatMessageRoleUser, + Content: &schemas.ChatMessageContent{ + ContentBlocks: []schemas.ChatContentBlock{ + {Type: schemas.ChatContentBlockTypeText, Text: bifrost.Ptr(text)}, + { + Type: schemas.ChatContentBlockTypeInputAudio, + InputAudio: &schemas.ChatInputAudio{ + Data: audioData, + Format: format, + }, + }, + }, + }, + } +} + func CreateToolChatMessage(content string, toolCallID string) schemas.ChatMessage { return schemas.ChatMessage{ Role: schemas.ChatMessageRoleTool, @@ -627,7 +663,7 @@ func GetErrorMessage(err *schemas.BifrostError) string { } errorCode := "" - if err.Error != nil && err.Error.Code != nil && *err.Error.Code != "" { + if err.Error != nil && err.Error.Code != nil && *err.Error.Code != "" { errorCode = *err.Error.Code } diff --git a/core/internal/testutil/validation_presets.go b/core/internal/testutil/validation_presets.go index c096be72f..cd45eb331 100644 --- a/core/internal/testutil/validation_presets.go +++ b/core/internal/testutil/validation_presets.go @@ -3,7 +3,6 @@ package testutil import ( "regexp" - "github.com/maximhq/bifrost/core/schemas" ) @@ -200,6 +199,21 @@ func ReasoningExpectations() ResponseExpectations { } } +// ChatAudioExpectations returns validation expectations for chat audio scenarios +func ChatAudioExpectations() ResponseExpectations { + return ResponseExpectations{ + ShouldHaveContent: false, // Chat audio responses may have audio/transcript but not text content + ExpectedChoiceCount: 1, // Should have one choice with audio data + ShouldHaveUsageStats: true, + ShouldHaveTimestamps: true, + ShouldHaveModel: true, + ShouldHaveLatency: true, // Global expectation: latency should always be present + ProviderSpecific: map[string]interface{}{ + "response_type": "chat_audio", + }, + } +} + // ============================================================================= // SCENARIO-SPECIFIC EXPECTATION BUILDERS // ============================================================================= @@ -281,6 +295,9 @@ func GetExpectationsForScenario(scenarioName string, testConfig ComprehensiveTes expectations := ReasoningExpectations() return expectations + case "ChatAudio": + return ChatAudioExpectations() + case "ProviderSpecific": expectations := BasicChatExpectations() expectations.ShouldContainKeywords = []string{"unique", "specific", "capability"} diff --git a/core/providers/azure/azure_test.go b/core/providers/azure/azure_test.go index e51c503ab..a973879ed 100644 --- a/core/providers/azure/azure_test.go +++ b/core/providers/azure/azure_test.go @@ -24,15 +24,18 @@ func TestAzure(t *testing.T) { defer cancel() testConfig := testutil.ComprehensiveTestConfig{ - Provider: schemas.Azure, - ChatModel: "gpt-4o-backup", - VisionModel: "gpt-4o", + Provider: schemas.Azure, + ChatModel: "gpt-4o-backup", + VisionModel: "gpt-4o", + ChatAudioModel: "gpt-4o-mini-audio-preview", Fallbacks: []schemas.Fallback{ {Provider: schemas.Azure, Model: "gpt-4o-backup"}, }, - TextModel: "", // Azure doesn't support text completion in newer models - EmbeddingModel: "text-embedding-ada-002", - ReasoningModel: "claude-opus-4-5", + TextModel: "", // Azure doesn't support text completion in newer models + EmbeddingModel: "text-embedding-ada-002", + ReasoningModel: "claude-opus-4-5", + SpeechSynthesisModel: "gpt-4o-mini-tts", + TranscriptionModel: "whisper", Scenarios: testutil.TestScenarios{ TextCompletion: false, // Not supported SimpleChat: true, @@ -50,6 +53,11 @@ func TestAzure(t *testing.T) { Embedding: true, ListModels: true, Reasoning: true, + ChatAudio: true, + Transcription: true, + TranscriptionStream: false, // Not properly supported yet by Azure + SpeechSynthesis: true, + SpeechSynthesisStream: true, }, } diff --git a/core/providers/mistral/mistral.go b/core/providers/mistral/mistral.go index d5c221e4a..0be35657d 100644 --- a/core/providers/mistral/mistral.go +++ b/core/providers/mistral/mistral.go @@ -467,6 +467,11 @@ func (provider *MistralProvider) TranscriptionStream(ctx context.Context, postHo if currentEvent != "" && currentData != "" { chunkIndex++ provider.processTranscriptionStreamEvent(ctx, postHookRunner, currentEvent, currentData, request.Model, providerName, chunkIndex, startTime, &lastChunkTime, responseChan) + // Break the loop if this was a done event (check both possible event types) + eventType := MistralTranscriptionStreamEventType(currentEvent) + if eventType == MistralTranscriptionStreamEventDone || currentEvent == "transcript.text.done" { + break + } } // Reset for next event currentEvent = "" @@ -486,6 +491,7 @@ func (provider *MistralProvider) TranscriptionStream(ctx context.Context, postHo if currentEvent != "" && currentData != "" { chunkIndex++ provider.processTranscriptionStreamEvent(ctx, postHookRunner, currentEvent, currentData, request.Model, providerName, chunkIndex, startTime, &lastChunkTime, responseChan) + // Note: No need to break here as scanner.Scan() has already finished } // Handle scanner errors @@ -564,10 +570,12 @@ func (provider *MistralProvider) processTranscriptionStreamEvent( response.ExtraFields.RawResponse = jsonData } - // Check for done event - if MistralTranscriptionStreamEventType(eventType) == MistralTranscriptionStreamEventDone { + // Check for done event (handle both "transcription.done" and "transcript.text.done") + if MistralTranscriptionStreamEventType(eventType) == MistralTranscriptionStreamEventDone || eventType == "transcript.text.done" { response.ExtraFields.Latency = time.Since(startTime).Milliseconds() ctx = context.WithValue(ctx, schemas.BifrostContextKeyStreamEndIndicator, true) + // Ensure response type is set to Done + response.Type = schemas.TranscriptionStreamResponseTypeDone } providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, nil, nil, nil, response), responseChan) diff --git a/core/providers/mistral/mistral_test.go b/core/providers/mistral/mistral_test.go index 136c0c5bb..5ab9c8ade 100644 --- a/core/providers/mistral/mistral_test.go +++ b/core/providers/mistral/mistral_test.go @@ -28,9 +28,11 @@ func TestMistral(t *testing.T) { Fallbacks: []schemas.Fallback{ {Provider: schemas.Mistral, Model: "mistral-small-2503"}, }, - VisionModel: "pixtral-12b-latest", - EmbeddingModel: "codestral-embed", - TranscriptionModel: "voxtral-mini-latest", // Mistral's audio transcription model + VisionModel: "pixtral-12b-latest", + EmbeddingModel: "codestral-embed", + TranscriptionModel: "voxtral-mini-latest", // Mistral's audio transcription model + ExternalTTSProvider: schemas.OpenAI, + ExternalTTSModel: "gpt-4o-mini-tts", Scenarios: testutil.TestScenarios{ TextCompletion: false, // Not supported SimpleChat: true, @@ -47,8 +49,8 @@ func TestMistral(t *testing.T) { CompleteEnd2End: true, Embedding: true, Transcription: true, - TranscriptionStream: true, // Streaming transcription supported - ListModels: false, + TranscriptionStream: true, + ListModels: true, Reasoning: false, // Not supported right now because we are not using native mistral converters }, } diff --git a/core/providers/mistral/transcription_test.go b/core/providers/mistral/transcription_test.go index cc66da0f6..d1a2dbb68 100644 --- a/core/providers/mistral/transcription_test.go +++ b/core/providers/mistral/transcription_test.go @@ -980,6 +980,7 @@ func TestCreateMistralTranscriptionStreamMultipartBody(t *testing.T) { Model: "voxtral-mini-latest", File: []byte{0x01, 0x02, 0x03}, Language: schemas.Ptr("en"), + Stream: schemas.Ptr(true), }, expectedFields: map[string]string{ "stream": "true", @@ -996,6 +997,7 @@ func TestCreateMistralTranscriptionStreamMultipartBody(t *testing.T) { Prompt: schemas.Ptr("Test prompt"), ResponseFormat: schemas.Ptr("verbose_json"), Temperature: schemas.Ptr(0.5), + Stream: schemas.Ptr(true), TimestampGranularities: []string{"word", "segment"}, }, expectedFields: map[string]string{ diff --git a/core/providers/openai/openai.go b/core/providers/openai/openai.go index 31628e8cc..6085080da 100644 --- a/core/providers/openai/openai.go +++ b/core/providers/openai/openai.go @@ -1048,6 +1048,7 @@ func HandleOpenAIChatCompletionStreaming( if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil && (choice.ChatStreamResponseChoice.Delta.Content != nil || + choice.ChatStreamResponseChoice.Delta.Audio != nil || len(choice.ChatStreamResponseChoice.Delta.ToolCalls) > 0) { chunkIndex++ diff --git a/core/providers/openai/openai_test.go b/core/providers/openai/openai_test.go index 19077b809..0bedc6747 100644 --- a/core/providers/openai/openai_test.go +++ b/core/providers/openai/openai_test.go @@ -38,6 +38,7 @@ func TestOpenAI(t *testing.T) { }, SpeechSynthesisModel: "gpt-4o-mini-tts", ReasoningModel: "o1", + ChatAudioModel: "gpt-4o-mini-audio-preview", Scenarios: testutil.TestScenarios{ TextCompletion: true, TextCompletionStream: true, @@ -71,6 +72,7 @@ func TestOpenAI(t *testing.T) { FileDelete: true, FileContent: true, FileBatchInput: true, + ChatAudio: true, }, } diff --git a/core/schemas/chatcompletions.go b/core/schemas/chatcompletions.go index 9ec7c6193..c305cc059 100644 --- a/core/schemas/chatcompletions.go +++ b/core/schemas/chatcompletions.go @@ -154,30 +154,31 @@ func (cr *BifrostChatResponse) ToTextCompletionResponse() *BifrostTextCompletion // ChatParameters represents the parameters for a chat completion. type ChatParameters struct { - FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` // Penalizes frequent tokens - LogitBias *map[string]float64 `json:"logit_bias,omitempty"` // Bias for logit values - LogProbs *bool `json:"logprobs,omitempty"` // Number of logprobs to return - MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"` // Maximum number of tokens to generate - Metadata *map[string]any `json:"metadata,omitempty"` // Metadata to be returned with the response - Modalities []string `json:"modalities,omitempty"` // Modalities to be returned with the response - ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` - PresencePenalty *float64 `json:"presence_penalty,omitempty"` // Penalizes repeated tokens - PromptCacheKey *string `json:"prompt_cache_key,omitempty"` // Prompt cache key - Reasoning *ChatReasoning `json:"reasoning,omitempty"` // Reasoning parameters - ResponseFormat *interface{} `json:"response_format,omitempty"` // Format for the response - SafetyIdentifier *string `json:"safety_identifier,omitempty"` // Safety identifier - Seed *int `json:"seed,omitempty"` - ServiceTier *string `json:"service_tier,omitempty"` - StreamOptions *ChatStreamOptions `json:"stream_options,omitempty"` - Stop []string `json:"stop,omitempty"` - Store *bool `json:"store,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopLogProbs *int `json:"top_logprobs,omitempty"` - TopP *float64 `json:"top_p,omitempty"` // Controls diversity via nucleus sampling - ToolChoice *ChatToolChoice `json:"tool_choice,omitempty"` // Whether to call a tool - Tools []ChatTool `json:"tools,omitempty"` // Tools to use - User *string `json:"user,omitempty"` // User identifier for tracking - Verbosity *string `json:"verbosity,omitempty"` // "low" | "medium" | "high" + Audio *ChatAudioParameters `json:"audio,omitempty"` // Audio parameters + FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` // Penalizes frequent tokens + LogitBias *map[string]float64 `json:"logit_bias,omitempty"` // Bias for logit values + LogProbs *bool `json:"logprobs,omitempty"` // Number of logprobs to return + MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"` // Maximum number of tokens to generate + Metadata *map[string]any `json:"metadata,omitempty"` // Metadata to be returned with the response + Modalities []string `json:"modalities,omitempty"` // Modalities to be returned with the response + ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` + PresencePenalty *float64 `json:"presence_penalty,omitempty"` // Penalizes repeated tokens + PromptCacheKey *string `json:"prompt_cache_key,omitempty"` // Prompt cache key + Reasoning *ChatReasoning `json:"reasoning,omitempty"` // Reasoning parameters + ResponseFormat *interface{} `json:"response_format,omitempty"` // Format for the response + SafetyIdentifier *string `json:"safety_identifier,omitempty"` // Safety identifier + Seed *int `json:"seed,omitempty"` + ServiceTier *string `json:"service_tier,omitempty"` + StreamOptions *ChatStreamOptions `json:"stream_options,omitempty"` + Stop []string `json:"stop,omitempty"` + Store *bool `json:"store,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopLogProbs *int `json:"top_logprobs,omitempty"` + TopP *float64 `json:"top_p,omitempty"` // Controls diversity via nucleus sampling + ToolChoice *ChatToolChoice `json:"tool_choice,omitempty"` // Whether to call a tool + Tools []ChatTool `json:"tools,omitempty"` // Tools to use + User *string `json:"user,omitempty"` // User identifier for tracking + Verbosity *string `json:"verbosity,omitempty"` // "low" | "medium" | "high" // Dynamic parameters that can be provider-specific, they are directly // added to the request as is. @@ -220,6 +221,12 @@ func (cp *ChatParameters) UnmarshalJSON(data []byte) error { return nil } +// ChatAudioParameters represents the parameters for a chat audio completion. (Only supported by OpenAI Models that support audio input) +type ChatAudioParameters struct { + Format string `json:"format,omitempty"` // Format for the audio completion + Voice string `json:"voice,omitempty"` // Voice to use for the audio completion +} + // Not in OpenAI's spec, but needed to support extra parameters for reasoning. type ChatReasoning struct { Effort *string `json:"effort,omitempty"` // "none" | "minimal" | "low" | "medium" | "high" (any value other than "none" will enable reasoning) @@ -515,7 +522,7 @@ func (cm *ChatMessage) UnmarshalJSON(data []byte) error { // Only set if any field is populated if assistantMsg.Refusal != nil || assistantMsg.Reasoning != nil || len(assistantMsg.ReasoningDetails) > 0 || len(assistantMsg.Annotations) > 0 || - len(assistantMsg.ToolCalls) > 0 { + len(assistantMsg.ToolCalls) > 0 || assistantMsg.Audio != nil { cm.ChatAssistantMessage = &assistantMsg } @@ -641,6 +648,7 @@ type ChatToolMessage struct { // ChatAssistantMessage represents a message in a chat conversation. type ChatAssistantMessage struct { Refusal *string `json:"refusal,omitempty"` + Audio *ChatAudioMessageAudio `json:"audio,omitempty"` Reasoning *string `json:"reasoning,omitempty"` ReasoningDetails []ChatReasoningDetails `json:"reasoning_details,omitempty"` Annotations []ChatAssistantMessageAnnotation `json:"annotations,omitempty"` @@ -713,6 +721,14 @@ type ChatAssistantMessageToolCallFunction struct { Arguments string `json:"arguments"` // stringified json as retured by OpenAI, might not be a valid JSON always } +// ChatAudioMessageAudio represents audio data in a message. +type ChatAudioMessageAudio struct { + ID string `json:"id"` + Data string `json:"data"` + ExpiresAt int `json:"expires_at"` + Transcript string `json:"transcript"` +} + // BifrostResponseChoice represents a choice in the completion result. // This struct can represent either a streaming or non-streaming response choice. // IMPORTANT: Only one of TextCompletionResponseChoice, NonStreamResponseChoice or StreamResponseChoice @@ -773,6 +789,7 @@ type ChatStreamResponseChoiceDelta struct { Role *string `json:"role,omitempty"` // Only in the first chunk Content *string `json:"content,omitempty"` // May be empty string or null Refusal *string `json:"refusal,omitempty"` // Refusal content if any + Audio *ChatAudioMessageAudio `json:"audio,omitempty"` // Audio data if any Reasoning *string `json:"reasoning,omitempty"` // May be empty string or null ReasoningDetails []ChatReasoningDetails `json:"reasoning_details,omitempty"` ToolCalls []ChatAssistantMessageToolCall `json:"tool_calls,omitempty"` // If tool calls used (supports incremental updates) @@ -835,7 +852,9 @@ type BifrostLLMUsage struct { } type ChatPromptTokensDetails struct { + TextTokens int `json:"text_tokens,omitempty"` AudioTokens int `json:"audio_tokens,omitempty"` + ImageTokens int `json:"image_tokens,omitempty"` // For Providers which follow OpenAI's spec, CachedTokens means the number of input tokens read from the cache+input tokens used to create the cache entry. (because they do not differentiate between cache creation and cache read tokens) // For Providers which do not follow OpenAI's spec, CachedTokens means only the number of input tokens read from the cache. @@ -843,6 +862,7 @@ type ChatPromptTokensDetails struct { } type ChatCompletionTokensDetails struct { + TextTokens int `json:"text_tokens,omitempty"` AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"` AudioTokens int `json:"audio_tokens,omitempty"` CitationTokens *int `json:"citation_tokens,omitempty"` diff --git a/core/schemas/mux.go b/core/schemas/mux.go index d4b10da53..b43ba2f85 100644 --- a/core/schemas/mux.go +++ b/core/schemas/mux.go @@ -651,12 +651,15 @@ func (cu *BifrostLLMUsage) ToResponsesResponseUsage() *ResponsesResponseUsage { if cu.PromptTokensDetails != nil { usage.InputTokensDetails = &ResponsesResponseInputTokens{ + TextTokens: cu.PromptTokensDetails.TextTokens, AudioTokens: cu.PromptTokensDetails.AudioTokens, + ImageTokens: cu.PromptTokensDetails.ImageTokens, CachedTokens: cu.PromptTokensDetails.CachedTokens, } } if cu.CompletionTokensDetails != nil { usage.OutputTokensDetails = &ResponsesResponseOutputTokens{ + TextTokens: cu.CompletionTokensDetails.TextTokens, AcceptedPredictionTokens: cu.CompletionTokensDetails.AcceptedPredictionTokens, AudioTokens: cu.CompletionTokensDetails.AudioTokens, ReasoningTokens: cu.CompletionTokensDetails.ReasoningTokens, @@ -684,12 +687,15 @@ func (ru *ResponsesResponseUsage) ToBifrostLLMUsage() *BifrostLLMUsage { if ru.InputTokensDetails != nil { usage.PromptTokensDetails = &ChatPromptTokensDetails{ + TextTokens: ru.InputTokensDetails.TextTokens, AudioTokens: ru.InputTokensDetails.AudioTokens, + ImageTokens: ru.InputTokensDetails.ImageTokens, CachedTokens: ru.InputTokensDetails.CachedTokens, } } if ru.OutputTokensDetails != nil { usage.CompletionTokensDetails = &ChatCompletionTokensDetails{ + TextTokens: ru.OutputTokensDetails.TextTokens, AcceptedPredictionTokens: ru.OutputTokensDetails.AcceptedPredictionTokens, AudioTokens: ru.OutputTokensDetails.AudioTokens, ReasoningTokens: ru.OutputTokensDetails.ReasoningTokens, diff --git a/core/schemas/responses.go b/core/schemas/responses.go index cf0d83582..4e1de267c 100644 --- a/core/schemas/responses.go +++ b/core/schemas/responses.go @@ -265,14 +265,17 @@ type ResponsesResponseUsage struct { } type ResponsesResponseInputTokens struct { - AudioTokens int `json:"audio_tokens"` // Tokens for audio input + TextTokens int `json:"text_tokens,omitempty"` // Tokens for text input + AudioTokens int `json:"audio_tokens,omitempty"` // Tokens for audio input + ImageTokens int `json:"image_tokens,omitempty"` // Tokens for image input // For Providers which follow OpenAI's spec, CachedTokens means the number of input tokens read from the cache+input tokens used to create the cache entry. (because they do not differentiate between cache creation and cache read tokens) // For Providers which do not follow OpenAI's spec, CachedTokens means only the number of input tokens read from the cache. - CachedTokens int `json:"cached_tokens"` + CachedTokens int `json:"cached_tokens,omitempty"` } type ResponsesResponseOutputTokens struct { + TextTokens int `json:"text_tokens,omitempty"` AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"` AudioTokens int `json:"audio_tokens,omitempty"` ReasoningTokens int `json:"reasoning_tokens"` // Required for few OpenAI models diff --git a/core/version b/core/version index adf1ebc44..84da4213b 100644 --- a/core/version +++ b/core/version @@ -1 +1 @@ -1.2.38 \ No newline at end of file +1.2.39 \ No newline at end of file diff --git a/docs/contributing/setting-up-repo.mdx b/docs/contributing/setting-up-repo.mdx index 57f42b2df..b8b751d88 100644 --- a/docs/contributing/setting-up-repo.mdx +++ b/docs/contributing/setting-up-repo.mdx @@ -9,7 +9,7 @@ This guide walks you through setting up the Bifrost repository for local develop ## Prerequisites Before setting up the repository, ensure you have the following tools installed: -- [Go](https://go.dev/doc/install) (1.24.3) +- [Go](https://go.dev/doc/install) (1.25.5) - [Node.js](https://nodejs.org/en/download) (>= 18.0.0) and npm - [Make](/deployment-guides/how-to/install-make) - [Docker](https://www.docker.com) (optional, for containerized development) diff --git a/docs/plugins/building-dynamic-binary.mdx b/docs/plugins/building-dynamic-binary.mdx index fd0d895ad..a67151630 100644 --- a/docs/plugins/building-dynamic-binary.mdx +++ b/docs/plugins/building-dynamic-binary.mdx @@ -114,7 +114,7 @@ RUN npx next build RUN node scripts/fix-paths.js # --- Go Build Stage: Compile the Go binary --- -FROM golang:1.24.3-alpine3.22 AS builder +FROM golang:1.25.5-alpine3.22 AS builder WORKDIR /app # Install dependencies including gcc for CGO and sqlite @@ -230,7 +230,7 @@ RUN npx next build RUN node scripts/fix-paths.js # --- Go Build Stage: Compile the Go binary --- -FROM golang:1.24.3-bookworm AS builder +FROM golang:1.25.5-bookworm AS builder WORKDIR /app # Install dependencies including gcc for CGO and sqlite @@ -380,12 +380,12 @@ error while loading shared libraries: libc.musl-x86_64.so.1: cannot open shared | Target Deployment | Build With | Dockerfile Base | |-------------------|------------|-----------------| -| Alpine containers | musl | `golang:1.24.3-alpine3.22` | -| Debian/Ubuntu containers | glibc | `golang:1.24.3-bookworm` | -| Ubuntu/Debian servers | glibc | `golang:1.24.3-bookworm` | +| Alpine containers | musl | `golang:1.25.5-alpine3.22` | +| Debian/Ubuntu containers | glibc | `golang:1.25.5-bookworm` | +| Ubuntu/Debian servers | glibc | `golang:1.25.5-bookworm` | | RHEL/CentOS servers | glibc | Native build or glibc container | -| Kubernetes (Alpine) | musl | `golang:1.24.3-alpine3.22` | -| Kubernetes (Debian) | glibc | `golang:1.24.3-bookworm` | +| Kubernetes (Alpine) | musl | `golang:1.25.5-alpine3.22` | +| Kubernetes (Debian) | glibc | `golang:1.25.5-bookworm` | **Simple rule:** Build with the same base OS family as your deployment target. @@ -398,7 +398,7 @@ Plugins **must** be built with the **exact same environment** as your Bifrost bi docker run --rm \ -v "$PWD:/work" \ -w /work \ - golang:1.24.3-alpine3.22 \ + golang:1.25.5-alpine3.22 \ sh -c "apk add --no-cache gcc musl-dev && \ go build -buildmode=plugin -o myplugin.so main.go" @@ -406,7 +406,7 @@ docker run --rm \ docker run --rm \ -v "$PWD:/work" \ -w /work \ - golang:1.24.3-bookworm \ + golang:1.25.5-bookworm \ sh -c "apt-get update && apt-get install -y gcc && \ go build -buildmode=plugin -o myplugin.so main.go" ``` @@ -451,14 +451,14 @@ Test that your plugin loads successfully: ### Go Version Requirement -Bifrost is built with **Go 1.24.3**. Your plugin **must** be compiled with the exact same Go version to ensure compatibility. +Bifrost is built with **Go 1.25.5**. Your plugin **must** be compiled with the exact same Go version to ensure compatibility. ```bash # Check your Go version go version -# Should output: go version go1.24.3 ... +# Should output: go version go1.25.5 ... -# If you need to install Go 1.24.3 +# If you need to install Go 1.25.5 # Visit: https://go.dev/dl/ ``` @@ -531,12 +531,12 @@ When creating a plugin, your `go.mod` should match Bifrost's Go version: ```go module github.com/example/my-plugin -go 1.24.3 +go 1.25.5 require ( - github.com/maximhq/bifrost/core v1.2.26 + github.com/maximhq/bifrost/core v1.2.38 // Optional: Add framework for advanced features - // github.com/maximhq/bifrost/framework v1.1.33 + // github.com/maximhq/bifrost/framework v1.1.48 // Add other dependencies as needed, matching versions from Bifrost's go.mod files // github.com/bytedance/sonic v1.14.1 @@ -560,14 +560,14 @@ cannot load plugin: plugin was built with a different version of package runtime **Cause:** Plugin and Bifrost were built with different Go versions. -**Solution:** Use the exact same Go version (Go 1.24.3) for both: +**Solution:** Use the exact same Go version (Go 1.25.5) for both: ```bash # Check Go version used for Bifrost ./tmp/bifrost-http -version # Verify your Go version matches -go version # Should output: go version go1.24.3 +go version # Should output: go version go1.25.5 # See full compatibility requirements ``` @@ -639,10 +639,10 @@ Match Bifrost's exact Go version and key dependencies (see [Go Version and Packa ```bash # Pin Go version in Dockerfile -FROM golang:1.24.3-alpine3.22 AS builder +FROM golang:1.25.5-alpine3.22 AS builder # Pin Go version in Makefile/CI -GO_VERSION=1.24.3 +GO_VERSION=1.25.5 ``` ### 3. Test Plugin Loading Locally @@ -674,14 +674,14 @@ Build plugins in the same Dockerfile as Bifrost: ```dockerfile # Build plugin -FROM golang:1.24.3-alpine3.22 AS plugin-builder +FROM golang:1.25.5-alpine3.22 AS plugin-builder WORKDIR /plugin COPY plugins/myplugin/ . RUN apk add --no-cache gcc musl-dev && \ go build -buildmode=plugin -o myplugin.so main.go # Build Bifrost -FROM golang:1.24.3-alpine3.22 AS bifrost-builder +FROM golang:1.25.5-alpine3.22 AS bifrost-builder # ... (bifrost build steps) # Runtime diff --git a/docs/plugins/writing-plugin.mdx b/docs/plugins/writing-plugin.mdx index 5565d6e30..8eb71bf5c 100644 --- a/docs/plugins/writing-plugin.mdx +++ b/docs/plugins/writing-plugin.mdx @@ -12,12 +12,12 @@ This guide walks you through creating a custom plugin for Bifrost using our [hel Before you start, ensure you have: -- **Go 1.24+** installed (must match Bifrost's Go version) +- **Go 1.25.5** installed (must match Bifrost's Go version) - **Linux or macOS** (Go plugins are not supported on Windows) - **Bifrost** installed and configured - Basic understanding of Go programming -Make sure your go.mod has the go version pinned to 1.24.0 +Make sure your go.mod has the go version pinned to 1.25.5 ## Project Structure @@ -53,9 +53,9 @@ Your `go.mod` should look like this: ```go module github.com/yourusername/my-plugin -go 1.24.0 +go 1.25.5 -require github.com/maximhq/bifrost/core v1.2.17 +require github.com/maximhq/bifrost/core v1.2.38 ``` ## Step 2: Implement the Plugin Interface @@ -531,7 +531,7 @@ jobs: # 2. Setup Go - uses: actions/setup-go@v4 with: - go-version: '1.24' + go-version: '1.25.5' # 3. Build Bifrost - name: Build Bifrost diff --git a/examples/plugins/hello-world/Makefile b/examples/plugins/hello-world/Makefile index 0337bc5d3..bbfbbba98 100644 --- a/examples/plugins/hello-world/Makefile +++ b/examples/plugins/hello-world/Makefile @@ -127,7 +127,7 @@ _build-with-docker: # Internal target for Docker-based cross-compilation -e CGO_ENABLED=1 \ -e GOOS=$(TARGET_OS) \ -e GOARCH=$(TARGET_ARCH) \ - golang:1.24.3-alpine3.22 \ + golang:1.25.5-alpine3.22 \ sh -c "apk add --no-cache gcc musl-dev && \ go build -buildmode=plugin -ldflags='-w -s' -trimpath -o $(OUTPUT) main.go"; \ echo "$(COLOR_SUCCESS)✓ Plugin built successfully: $(OUTPUT) ($(TARGET_OS)/$(TARGET_ARCH))$(COLOR_RESET)"; \ diff --git a/framework/changelog.md b/framework/changelog.md index e69de29bb..39a30a3ff 100644 --- a/framework/changelog.md +++ b/framework/changelog.md @@ -0,0 +1 @@ +- chore: update core version to 1.2.39 \ No newline at end of file diff --git a/framework/modelcatalog/pricing.go b/framework/modelcatalog/pricing.go index 49c80b5c5..a51ca4a2c 100644 --- a/framework/modelcatalog/pricing.go +++ b/framework/modelcatalog/pricing.go @@ -40,7 +40,15 @@ func (mc *ModelCatalog) CalculateCost(result *schemas.BifrostResponse) float64 { case result.EmbeddingResponse != nil && result.EmbeddingResponse.Usage != nil: usage = result.EmbeddingResponse.Usage case result.SpeechResponse != nil: - return 0 + if result.SpeechResponse.Usage != nil { + usage = &schemas.BifrostLLMUsage{ + PromptTokens: result.SpeechResponse.Usage.InputTokens, + CompletionTokens: result.SpeechResponse.Usage.OutputTokens, + TotalTokens: result.SpeechResponse.Usage.TotalTokens, + } + } else { + return 0 + } case result.SpeechStreamResponse != nil && result.SpeechStreamResponse.Usage != nil: usage = &schemas.BifrostLLMUsage{ PromptTokens: result.SpeechStreamResponse.Usage.InputTokens, @@ -65,6 +73,9 @@ func (mc *ModelCatalog) CalculateCost(result *schemas.BifrostResponse) float64 { audioTokenDetails.AudioTokens = result.TranscriptionResponse.Usage.InputTokenDetails.AudioTokens audioTokenDetails.TextTokens = result.TranscriptionResponse.Usage.InputTokenDetails.TextTokens } + if result.TranscriptionResponse.Usage.Seconds != nil { + audioSeconds = result.TranscriptionResponse.Usage.Seconds + } case result.TranscriptionStreamResponse != nil && result.TranscriptionStreamResponse.Usage != nil: usage = &schemas.BifrostLLMUsage{} if result.TranscriptionStreamResponse.Usage.InputTokens != nil { @@ -83,6 +94,9 @@ func (mc *ModelCatalog) CalculateCost(result *schemas.BifrostResponse) float64 { audioTokenDetails.AudioTokens = result.TranscriptionStreamResponse.Usage.InputTokenDetails.AudioTokens audioTokenDetails.TextTokens = result.TranscriptionStreamResponse.Usage.InputTokenDetails.TextTokens } + if result.TranscriptionStreamResponse.Usage.Seconds != nil { + audioSeconds = result.TranscriptionStreamResponse.Usage.Seconds + } default: return 0 } diff --git a/framework/streaming/chat.go b/framework/streaming/chat.go index 185a45019..36f3b9717 100644 --- a/framework/streaming/chat.go +++ b/framework/streaming/chat.go @@ -103,6 +103,36 @@ func (a *Accumulator) buildCompleteMessageFromChatStreamChunks(chunks []*ChatStr } } } + // Handle audio data - accumulate audio data and transcript + if chunk.Delta.Audio != nil { + if completeMessage.ChatAssistantMessage == nil { + completeMessage.ChatAssistantMessage = &schemas.ChatAssistantMessage{} + } + if completeMessage.ChatAssistantMessage.Audio == nil { + // First chunk with audio - initialize + completeMessage.ChatAssistantMessage.Audio = &schemas.ChatAudioMessageAudio{ + ID: chunk.Delta.Audio.ID, + Data: chunk.Delta.Audio.Data, + ExpiresAt: chunk.Delta.Audio.ExpiresAt, + Transcript: chunk.Delta.Audio.Transcript, + } + } else { + // Subsequent chunks - accumulate data and transcript + if chunk.Delta.Audio.Data != "" { + completeMessage.ChatAssistantMessage.Audio.Data += chunk.Delta.Audio.Data + } + if chunk.Delta.Audio.Transcript != "" { + completeMessage.ChatAssistantMessage.Audio.Transcript += chunk.Delta.Audio.Transcript + } + // Update ID and ExpiresAt if present (they should be consistent or final) + if chunk.Delta.Audio.ID != "" { + completeMessage.ChatAssistantMessage.Audio.ID = chunk.Delta.Audio.ID + } + if chunk.Delta.Audio.ExpiresAt != 0 { + completeMessage.ChatAssistantMessage.Audio.ExpiresAt = chunk.Delta.Audio.ExpiresAt + } + } + } // Accumulate tool calls if len(chunk.Delta.ToolCalls) > 0 { a.accumulateToolCallsInMessage(completeMessage, chunk.Delta.ToolCalls) diff --git a/framework/version b/framework/version index 3361394de..b1471d3a9 100644 --- a/framework/version +++ b/framework/version @@ -1 +1 @@ -1.1.48 \ No newline at end of file +1.1.49 \ No newline at end of file diff --git a/helm-charts/bifrost/Chart.yaml b/helm-charts/bifrost/Chart.yaml index e3533ea0b..2ecb610f1 100644 --- a/helm-charts/bifrost/Chart.yaml +++ b/helm-charts/bifrost/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: bifrost description: A Helm chart for deploying Bifrost - AI Gateway with unified interface for multiple providers type: application -version: 1.5.0 -appVersion: "1.5.0" +version: 1.5.1 +appVersion: "1.5.1" keywords: - ai - gateway diff --git a/helm-charts/index.yaml b/helm-charts/index.yaml index b1489317e..74c1004d7 100644 --- a/helm-charts/index.yaml +++ b/helm-charts/index.yaml @@ -1,6 +1,28 @@ apiVersion: v1 entries: bifrost: + - apiVersion: v2 + appVersion: 1.5.1 + created: "2025-12-12T12:00:00.000000+00:00" + description: A Helm chart for deploying Bifrost - AI Gateway with unified interface + for multiple providers + digest: "" + home: https://www.getmaxim.ai/bifrost + icon: https://www.getmaxim.ai/bifrost/bifrost-logo-only.png + keywords: + - ai + - gateway + - llm + maintainers: + - email: akshay@getmaxim.ai + name: Bifrost Team + name: bifrost + sources: + - https://github.com/maximhq/bifrost + type: application + urls: + - https://maximhq.github.io/bifrost/helm-charts/bifrost-1.5.1.tgz + version: 1.5.1 - apiVersion: v2 appVersion: 1.5.0 created: "2025-12-11T12:00:00.000000+00:00" diff --git a/plugins/governance/changelog.md b/plugins/governance/changelog.md index e69de29bb..5ef197d42 100644 --- a/plugins/governance/changelog.md +++ b/plugins/governance/changelog.md @@ -0,0 +1 @@ +- chore: update core version to 1.2.39 and framework version to 1.1.49 \ No newline at end of file diff --git a/plugins/governance/version b/plugins/governance/version index 8229aaebc..16e50f0b4 100644 --- a/plugins/governance/version +++ b/plugins/governance/version @@ -1 +1 @@ -1.3.49 \ No newline at end of file +1.3.50 \ No newline at end of file diff --git a/plugins/jsonparser/changelog.md b/plugins/jsonparser/changelog.md index e69de29bb..5ef197d42 100644 --- a/plugins/jsonparser/changelog.md +++ b/plugins/jsonparser/changelog.md @@ -0,0 +1 @@ +- chore: update core version to 1.2.39 and framework version to 1.1.49 \ No newline at end of file diff --git a/plugins/jsonparser/version b/plugins/jsonparser/version index 8229aaebc..16e50f0b4 100644 --- a/plugins/jsonparser/version +++ b/plugins/jsonparser/version @@ -1 +1 @@ -1.3.49 \ No newline at end of file +1.3.50 \ No newline at end of file diff --git a/plugins/logging/changelog.md b/plugins/logging/changelog.md index e69de29bb..5ef197d42 100644 --- a/plugins/logging/changelog.md +++ b/plugins/logging/changelog.md @@ -0,0 +1 @@ +- chore: update core version to 1.2.39 and framework version to 1.1.49 \ No newline at end of file diff --git a/plugins/logging/version b/plugins/logging/version index 8229aaebc..16e50f0b4 100644 --- a/plugins/logging/version +++ b/plugins/logging/version @@ -1 +1 @@ -1.3.49 \ No newline at end of file +1.3.50 \ No newline at end of file diff --git a/plugins/maxim/changelog.md b/plugins/maxim/changelog.md index e69de29bb..5ef197d42 100644 --- a/plugins/maxim/changelog.md +++ b/plugins/maxim/changelog.md @@ -0,0 +1 @@ +- chore: update core version to 1.2.39 and framework version to 1.1.49 \ No newline at end of file diff --git a/plugins/maxim/version b/plugins/maxim/version index 0750769ee..aa8e212a6 100644 --- a/plugins/maxim/version +++ b/plugins/maxim/version @@ -1 +1 @@ -1.4.49 \ No newline at end of file +1.4.50 \ No newline at end of file diff --git a/plugins/mocker/changelog.md b/plugins/mocker/changelog.md index e69de29bb..5ef197d42 100644 --- a/plugins/mocker/changelog.md +++ b/plugins/mocker/changelog.md @@ -0,0 +1 @@ +- chore: update core version to 1.2.39 and framework version to 1.1.49 \ No newline at end of file diff --git a/plugins/mocker/version b/plugins/mocker/version index bba60c093..8229aaebc 100644 --- a/plugins/mocker/version +++ b/plugins/mocker/version @@ -1 +1 @@ -1.3.48 \ No newline at end of file +1.3.49 \ No newline at end of file diff --git a/plugins/otel/changelog.md b/plugins/otel/changelog.md index e69de29bb..5ef197d42 100644 --- a/plugins/otel/changelog.md +++ b/plugins/otel/changelog.md @@ -0,0 +1 @@ +- chore: update core version to 1.2.39 and framework version to 1.1.49 \ No newline at end of file diff --git a/plugins/otel/version b/plugins/otel/version index c30197954..feca5b25f 100644 --- a/plugins/otel/version +++ b/plugins/otel/version @@ -1 +1 @@ -1.0.48 \ No newline at end of file +1.0.49 \ No newline at end of file diff --git a/plugins/semanticcache/changelog.md b/plugins/semanticcache/changelog.md index e69de29bb..5ef197d42 100644 --- a/plugins/semanticcache/changelog.md +++ b/plugins/semanticcache/changelog.md @@ -0,0 +1 @@ +- chore: update core version to 1.2.39 and framework version to 1.1.49 \ No newline at end of file diff --git a/plugins/semanticcache/version b/plugins/semanticcache/version index bba60c093..8229aaebc 100644 --- a/plugins/semanticcache/version +++ b/plugins/semanticcache/version @@ -1 +1 @@ -1.3.48 \ No newline at end of file +1.3.49 \ No newline at end of file diff --git a/plugins/telemetry/changelog.md b/plugins/telemetry/changelog.md index e69de29bb..5ef197d42 100644 --- a/plugins/telemetry/changelog.md +++ b/plugins/telemetry/changelog.md @@ -0,0 +1 @@ +- chore: update core version to 1.2.39 and framework version to 1.1.49 \ No newline at end of file diff --git a/plugins/telemetry/version b/plugins/telemetry/version index bba60c093..8229aaebc 100644 --- a/plugins/telemetry/version +++ b/plugins/telemetry/version @@ -1 +1 @@ -1.3.48 \ No newline at end of file +1.3.49 \ No newline at end of file diff --git a/transports/Dockerfile b/transports/Dockerfile index 3e9a1f24c..4224d5fc6 100644 --- a/transports/Dockerfile +++ b/transports/Dockerfile @@ -15,7 +15,7 @@ RUN node scripts/fix-paths.js # Skip the copy-build step since we'll copy the files in the Go build stage # --- Go Build Stage: Compile the Go binary --- -FROM golang:1.24.3-alpine3.22 AS builder +FROM golang:1.25.5-alpine3.22 AS builder WORKDIR /app # Install dependencies including gcc for CGO and sqlite diff --git a/transports/changelog.md b/transports/changelog.md index f5e4406b5..d685d32d0 100644 --- a/transports/changelog.md +++ b/transports/changelog.md @@ -6,6 +6,7 @@ - chore: increased provider-level timeout limit to 48 hours - chore: bumped up Go version to 1.25.5 - docs: updates key management links for integration docs - [@Georgehe4](https://github.com/Georgehe4) +- chore: update core version to 1.2.39 and framework version to 1.1.49 - feat: prompt caching support for anthropic and bedrock(claude and nova models) - feat: reasoning support for bedrock nova 2 models - docs: updated langchain docs for reasoning and embedding \ No newline at end of file diff --git a/transports/version b/transports/version index 8229aaebc..16e50f0b4 100644 --- a/transports/version +++ b/transports/version @@ -1 +1 @@ -1.3.49 \ No newline at end of file +1.3.50 \ No newline at end of file diff --git a/ui/app/workspace/logs/views/audioPlayer.tsx b/ui/app/workspace/logs/views/audioPlayer.tsx index 82ab60d38..ec7215df9 100644 --- a/ui/app/workspace/logs/views/audioPlayer.tsx +++ b/ui/app/workspace/logs/views/audioPlayer.tsx @@ -2,15 +2,87 @@ import { Button } from "@/components/ui/button"; import { Pause, Play, Download } from "lucide-react"; import { useState } from "react"; -const AudioPlayer = ({ src }: { src: string }) => { +interface AudioPlayerProps { + src: string; + format?: string; // Optional format: "mp3", "wav", "pcm16", etc. +} + +const AudioPlayer = ({ src, format }: AudioPlayerProps) => { const [isPlaying, setIsPlaying] = useState(false); const [audio] = useState(typeof window !== "undefined" ? new Audio() : null); const [error, setError] = useState(null); - const createAudioBlob = (base64Data: string): Blob | null => { + // Convert PCM16 to WAV format + const convertPCM16ToWAV = (pcmData: Uint8Array, sampleRate: number = 24000, numChannels: number = 1): Uint8Array => { + const bitsPerSample = 16; + const byteRate = (sampleRate * numChannels * bitsPerSample) / 8; + const blockAlign = (numChannels * bitsPerSample) / 8; + const dataSize = pcmData.length; + const fileSize = 36 + dataSize; + + const wavBuffer = new ArrayBuffer(44 + dataSize); + const view = new DataView(wavBuffer); + + // RIFF header + const writeString = (offset: number, string: string) => { + for (let i = 0; i < string.length; i++) { + view.setUint8(offset + i, string.charCodeAt(i)); + } + }; + + writeString(0, "RIFF"); + view.setUint32(4, fileSize, true); + writeString(8, "WAVE"); + + // fmt subchunk + writeString(12, "fmt "); + view.setUint32(16, 16, true); // Subchunk1Size + view.setUint16(20, 1, true); // AudioFormat (1 = PCM) + view.setUint16(22, numChannels, true); // NumChannels + view.setUint32(24, sampleRate, true); // SampleRate + view.setUint32(28, byteRate, true); // ByteRate + view.setUint16(32, blockAlign, true); // BlockAlign + view.setUint16(34, bitsPerSample, true); // BitsPerSample + + // data subchunk + writeString(36, "data"); + view.setUint32(40, dataSize, true); + + // Copy PCM data + const wavArray = new Uint8Array(wavBuffer); + wavArray.set(pcmData, 44); + + return wavArray; + }; + + const createAudioBlob = (base64Data: string, audioFormat?: string): Blob | null => { try { - return new Blob([Uint8Array.from(atob(base64Data), (c) => c.charCodeAt(0))], { - type: "audio/mpeg", + const binaryString = atob(base64Data); + const pcmData = Uint8Array.from(binaryString, (c) => c.charCodeAt(0)); + + // Handle PCM16 format - convert to WAV + if (audioFormat === "pcm16" || audioFormat === "pcm_s16le_16") { + const wavData = convertPCM16ToWAV(pcmData); + // Create a new ArrayBuffer to ensure proper type + const buffer = new ArrayBuffer(wavData.length); + new Uint8Array(buffer).set(wavData); + return new Blob([buffer], { + type: "audio/wav", + }); + } + + // Handle other formats + let mimeType = "audio/mpeg"; // Default to MP3 + if (audioFormat === "wav") { + mimeType = "audio/wav"; + } else if (audioFormat === "ogg") { + mimeType = "audio/ogg"; + } else if (audioFormat === "webm") { + mimeType = "audio/webm"; + } + + return new Blob([pcmData], { + type: mimeType, }); } catch (err) { console.error("Failed to decode audio data:", err); @@ -26,7 +98,7 @@ const AudioPlayer = ({ src }: { src: string }) => { audio.pause(); setIsPlaying(false); } else { - const audioBlob = createAudioBlob(src); + const audioBlob = createAudioBlob(src, format); if (!audioBlob) return; const audioUrl = URL.createObjectURL(audioBlob); @@ -48,14 +120,26 @@ const AudioPlayer = ({ src }: { src: string }) => { const handleDownload = () => { if (!src) return; - const audioBlob = createAudioBlob(src); + const audioBlob = createAudioBlob(src, format); if (!audioBlob) return; const audioUrl = URL.createObjectURL(audioBlob); + // Determine file extension based on format + let extension = "mp3"; + if (format === "pcm16" || format === "pcm_s16le_16") { + extension = "wav"; + } else if (format === "wav") { + extension = "wav"; + } else if (format === "ogg") { + extension = "ogg"; + } else if (format === "webm") { + extension = "webm"; + } + const a = document.createElement("a"); a.href = audioUrl; - a.download = "speech-output.mp3"; + a.download = `speech-output.${extension}`; document.body.appendChild(a); a.click(); document.body.removeChild(a); diff --git a/ui/app/workspace/logs/views/filters.tsx b/ui/app/workspace/logs/views/filters.tsx index 302f0c538..7f5ff0c6b 100644 --- a/ui/app/workspace/logs/views/filters.tsx +++ b/ui/app/workspace/logs/views/filters.tsx @@ -324,10 +324,10 @@ export function LogFilters({ filters, onFiltersChange, liveEnabled, onLiveToggle - + - +
Recalculate costs diff --git a/ui/app/workspace/logs/views/logChatMessageView.tsx b/ui/app/workspace/logs/views/logChatMessageView.tsx index d7424ff3d..5b0c821cf 100644 --- a/ui/app/workspace/logs/views/logChatMessageView.tsx +++ b/ui/app/workspace/logs/views/logChatMessageView.tsx @@ -1,9 +1,11 @@ import { ChatMessage, ContentBlock } from "@/lib/types/logs"; import { CodeEditor } from "./codeEditor"; import { isJson, cleanJson } from "@/lib/utils/validation"; +import AudioPlayer from "./audioPlayer"; interface LogChatMessageViewProps { message: ChatMessage; + audioFormat?: string; // Optional audio format from request params } const renderContentBlock = (block: ContentBlock, index: number) => { @@ -62,7 +64,7 @@ const renderContentBlock = (block: ContentBlock, index: number) => { ); }; -export default function LogChatMessageView({ message }: LogChatMessageViewProps) { +export default function LogChatMessageView({ message, audioFormat }: LogChatMessageViewProps) { return (
@@ -178,6 +180,35 @@ export default function LogChatMessageView({ message }: LogChatMessageViewProps) />
)} + + {/* Handle audio output */} + {message.audio && ( +
+
Audio Output
+
+ {message.audio.transcript && ( +
+
Transcript:
+
{message.audio.transcript}
+
+ )} + {message.audio.data && ( +
+
Audio:
+ +
+ )} + {message.audio.id && ( +
+ ID: {message.audio.id} | Expires:{" "} + {message.audio.expires_at && Number.isFinite(message.audio.expires_at) + ? new Date(message.audio.expires_at * 1000).toLocaleString() + : "N/A"} +
+ )} +
+
+ )}
); } diff --git a/ui/app/workspace/logs/views/logDetailsSheet.tsx b/ui/app/workspace/logs/views/logDetailsSheet.tsx index 084c22a4b..57f673e5f 100644 --- a/ui/app/workspace/logs/views/logDetailsSheet.tsx +++ b/ui/app/workspace/logs/views/logDetailsSheet.tsx @@ -34,6 +34,10 @@ export function LogDetailSheet({ log, open, onOpenChange, handleDelete }: LogDet } catch (ignored) {} } + // Extract audio format from request params + // Format can be in params.audio?.format or params.extra_params?.audio?.format + const audioFormat = (log.params as any)?.audio?.format || (log.params as any)?.extra_params?.audio?.format || undefined; + return ( @@ -115,10 +119,22 @@ export function LogDetailSheet({ log, open, onOpenChange, handleDelete }: LogDet {log.fallback_index > 0 && } {log.virtual_key && } + {/* Display audio params if present */} + {(log.params as any)?.audio && ( + <> + {(log.params as any).audio.format && ( + + )} + {(log.params as any).audio.voice && ( + + )} + + )} + {log.params && Object.keys(log.params).length > 0 && Object.entries(log.params) - .filter(([key]) => key !== "tools" && key !== "instructions") + .filter(([key]) => key !== "tools" && key !== "instructions" && key !== "audio") .filter(([_, value]) => typeof value === "boolean" || typeof value === "number" || typeof value === "string") .map(([key, value]) => )}
@@ -360,7 +376,7 @@ export function LogDetailSheet({ log, open, onOpenChange, handleDelete }: LogDet <>
Conversation History
{log.input_history.slice(0, -1).map((message, index) => ( - + ))} )} @@ -369,7 +385,7 @@ export function LogDetailSheet({ log, open, onOpenChange, handleDelete }: LogDet {log.input_history && log.input_history.length > 0 && ( <>
Input
- + )} @@ -388,7 +404,7 @@ export function LogDetailSheet({ log, open, onOpenChange, handleDelete }: LogDet
Response
- + )} {log.responses_output && log.responses_output.length > 0 && !log.error_details?.error.message && ( diff --git a/ui/lib/constants/icons.tsx b/ui/lib/constants/icons.tsx index c85d9cbe5..56b97e9f6 100644 --- a/ui/lib/constants/icons.tsx +++ b/ui/lib/constants/icons.tsx @@ -592,52 +592,7 @@ export const ProviderIcons = { ); }, nebius: ({ size = "md", className = "" }: IconProps) => { - const resolvedSize = resolveSize(size); - return ( - - Nebius Token Factory - - - - - - - - - - - - - - - - ); + return Nebius; }, } as const; diff --git a/ui/lib/types/logs.ts b/ui/lib/types/logs.ts index e73a6469b..35d9a0b6d 100644 --- a/ui/lib/types/logs.ts +++ b/ui/lib/types/logs.ts @@ -114,6 +114,14 @@ export interface ChatMessage { tool_calls?: ToolCall[]; // For backward compatibility, tool calls are now in the content reasoning?: string; reasoning_details?: ReasoningDetails[]; + audio?: ChatAudioMessageAudio; +} + +export interface ChatAudioMessageAudio { + id: string; + data: string; + expires_at: number; + transcript: string; } export interface ReasoningDetails {