diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml
index fcddd24f0..38ad480b7 100644
--- a/.github/workflows/pr-tests.yml
+++ b/.github/workflows/pr-tests.yml
@@ -64,7 +64,7 @@ jobs:
       - name: Set up Go
         uses: actions/setup-go@v5
         with:
-          go-version: "1.24.3"
+          go-version: "1.25.5"
 
       - name: Set up Python
         uses: actions/setup-python@v5
diff --git a/.github/workflows/release-pipeline.yml b/.github/workflows/release-pipeline.yml
index b89e7e729..659392f48 100644
--- a/.github/workflows/release-pipeline.yml
+++ b/.github/workflows/release-pipeline.yml
@@ -81,7 +81,7 @@ jobs:
       - name: Set up Go
         uses: actions/setup-go@v5
         with:
-          go-version: "1.24.3"
+          go-version: "1.25.5"
       - name: Configure Git
         run: |
           git config user.name "GitHub Actions Bot"
@@ -139,7 +139,7 @@ jobs:
       - name: Set up Go
         uses: actions/setup-go@v5
         with:
-          go-version: "1.24.3"
+          go-version: "1.25.5"
 
       - name: Configure Git
         run: |
@@ -215,7 +215,7 @@ jobs:
       - name: Set up Go
         uses: actions/setup-go@v5
         with:
-          go-version: "1.24.3"
+          go-version: "1.25.5"
 
       - name: Configure Git
         run: |
@@ -294,7 +294,7 @@ jobs:
       - name: Set up Go
         uses: actions/setup-go@v5
         with:
-          go-version: "1.24.3"
+          go-version: "1.25.5"
 
       - name: Set up Node.js
         uses: actions/setup-node@v4
diff --git a/.github/workflows/snyk.yml b/.github/workflows/snyk.yml
index 1cd876a39..fe2111bd4 100644
--- a/.github/workflows/snyk.yml
+++ b/.github/workflows/snyk.yml
@@ -2,9 +2,9 @@ name: Snyk checks
 
 on:
   push:
-    branches: [main, master, '**/*']
+    branches: [main, master, "**/*"]
   pull_request:
-    branches: ['**/*']
+    branches: ["**/*"]
   workflow_dispatch:
 
 permissions:
@@ -44,17 +44,17 @@ jobs:
       - name: Setup Node (for UI)
         uses: actions/setup-node@v4
         with:
-          node-version: '20'
+          node-version: "20"
 
       - name: Setup Python (for tests tooling)
         uses: actions/setup-python@v5
         with:
-          python-version: '3.11'
+          python-version: "3.11"
 
       - name: Setup Go
         uses: actions/setup-go@v5
         with:
-          go-version: '1.24.3'
+          go-version: "1.25.5"
 
       - name: Install Snyk CLI
         uses: snyk/actions/setup@master
@@ -82,22 +82,22 @@ jobs:
       - name: Setup Node (for UI)
         uses: actions/setup-node@v4
         with:
-          node-version: '20'
+          node-version: "20"
 
       - name: Setup Python (for tests tooling)
         uses: actions/setup-python@v5
         with:
-          python-version: '3.11'
+          python-version: "3.11"
 
       - name: Setup Python (for tests tooling)
         uses: actions/setup-python@v5
         with:
-          python-version: '3.11'
-          cache: 'pip'
+          python-version: "3.11"
+          cache: "pip"
           cache-dependency-path: |
             tests/integrations/requirements.txt
             tests/governance/requirements.txt
-      
+
       - name: Install Python dependencies (tests tooling)
         run: |
           python -m pip install --disable-pip-version-check \
@@ -107,7 +107,7 @@ jobs:
       - name: Setup Go
         uses: actions/setup-go@v5
         with:
-          go-version: '1.24.3'
+          go-version: "1.25.5"
 
       - name: Build
         run: make build
diff --git a/Makefile b/Makefile
index 1b218f15a..c56679d40 100644
--- a/Makefile
+++ b/Makefile
@@ -213,7 +213,7 @@ _build-with-docker: # Internal target for Docker-based cross-compilation
 				-e GOOS=$(TARGET_OS) \
 				-e GOARCH=$(TARGET_ARCH) \
 				 $(if $(LOCAL),,-e GOWORK=off) \
-				golang:1.24.3-alpine3.22 \
+				golang:1.25.5-alpine3.22 \
 				sh -c "apk add --no-cache gcc musl-dev && \
 				go build \
 					-ldflags='-w -s -X main.Version=v$(VERSION)' \
@@ -230,7 +230,7 @@ _build-with-docker: # Internal target for Docker-based cross-compilation
 				-e GOOS=$(TARGET_OS) \
 				-e GOARCH=$(TARGET_ARCH) \
 				 $(if $(LOCAL),,-e GOWORK=off) \
-				golang:1.24.3-alpine3.22 \
+				golang:1.25.5-alpine3.22 \
 				sh -c "apk add --no-cache gcc musl-dev && \
 				go build \
 					-ldflags='-w -s -extldflags "-static" -X main.Version=v$(VERSION)' \
diff --git a/core/internal/testutil/account.go b/core/internal/testutil/account.go
index 7bfd3435b..a39ba3dba 100644
--- a/core/internal/testutil/account.go
+++ b/core/internal/testutil/account.go
@@ -53,6 +53,7 @@ type TestScenarios struct {
 	FileDelete            bool // File API delete functionality
 	FileContent           bool // File API content download functionality
 	FileBatchInput        bool // Whether batch create supports file-based input (InputFileID)
+	ChatAudio             bool // Chat completion with audio input/output functionality
 }
 
 // ComprehensiveTestConfig extends TestConfig with additional scenarios
@@ -66,6 +67,7 @@ type ComprehensiveTestConfig struct {
 	EmbeddingModel           string
 	TranscriptionModel       string
 	SpeechSynthesisModel     string
+	ChatAudioModel           string
 	Scenarios                TestScenarios
 	Fallbacks                []schemas.Fallback     // for chat, responses, image and reasoning tests
 	TextCompletionFallbacks  []schemas.Fallback     // for text completion tests
@@ -73,6 +75,8 @@ type ComprehensiveTestConfig struct {
 	SpeechSynthesisFallbacks []schemas.Fallback     // for speech synthesis tests
 	EmbeddingFallbacks       []schemas.Fallback     // for embedding tests
 	SkipReason               string                 // Reason to skip certain tests
+	ExternalTTSProvider      schemas.ModelProvider  // External TTS provider to use for testing
+	ExternalTTSModel         string                 // External TTS model to use for testing
 	BatchExtraParams         map[string]interface{} // Extra params for batch operations (e.g., role_arn, output_s3_uri for Bedrock)
 	FileExtraParams          map[string]interface{} // Extra params for file operations (e.g., s3_bucket for Bedrock)
 }
@@ -161,25 +165,25 @@ func (account *ComprehensiveTestAccount) GetKeysForProvider(ctx *context.Context
 					},
 				},
 			},
-		{
-			Models: []string{},
-			Weight: 1.0,
-			BedrockKeyConfig: &schemas.BedrockKeyConfig{
-				AccessKey:    os.Getenv("AWS_ACCESS_KEY_ID"),
-				SecretKey:    os.Getenv("AWS_SECRET_ACCESS_KEY"),
-				SessionToken: bifrost.Ptr(os.Getenv("AWS_SESSION_TOKEN")),
-				Region:       bifrost.Ptr(getEnvWithDefault("AWS_REGION", "us-east-1")),
-				ARN:          bifrost.Ptr(os.Getenv("AWS_BEDROCK_ARN")),
-				Deployments: map[string]string{
-					"claude-3.5-sonnet": "anthropic.claude-3-5-sonnet-20240620-v1:0",
-					"claude-3.7-sonnet": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
-					"claude-4-sonnet":   "global.anthropic.claude-sonnet-4-20250514-v1:0",
-					"claude-4.5-sonnet": "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
-					"claude-4.5-haiku":  "global.anthropic.claude-haiku-4-5-20251001-v1:0",
+			{
+				Models: []string{},
+				Weight: 1.0,
+				BedrockKeyConfig: &schemas.BedrockKeyConfig{
+					AccessKey:    os.Getenv("AWS_ACCESS_KEY_ID"),
+					SecretKey:    os.Getenv("AWS_SECRET_ACCESS_KEY"),
+					SessionToken: bifrost.Ptr(os.Getenv("AWS_SESSION_TOKEN")),
+					Region:       bifrost.Ptr(getEnvWithDefault("AWS_REGION", "us-east-1")),
+					ARN:          bifrost.Ptr(os.Getenv("AWS_BEDROCK_ARN")),
+					Deployments: map[string]string{
+						"claude-3.5-sonnet": "anthropic.claude-3-5-sonnet-20240620-v1:0",
+						"claude-3.7-sonnet": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
+						"claude-4-sonnet":   "global.anthropic.claude-sonnet-4-20250514-v1:0",
+						"claude-4.5-sonnet": "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+						"claude-4.5-haiku":  "global.anthropic.claude-haiku-4-5-20251001-v1:0",
+					},
 				},
+				UseForBatchAPI: bifrost.Ptr(true),
 			},
-			UseForBatchAPI: bifrost.Ptr(true),
-		},
 			{
 				Models: []string{"cohere.embed-v4:0"},
 				Weight: 1.0,
@@ -218,6 +222,20 @@ func (account *ComprehensiveTestAccount) GetKeysForProvider(ctx *context.Context
 				},
 				UseForBatchAPI: bifrost.Ptr(true),
 			},
+			{
+				Value:  os.Getenv("AZURE_API_KEY"),
+				Models: []string{},
+				Weight: 1.0,
+				AzureKeyConfig: &schemas.AzureKeyConfig{
+					Endpoint:   os.Getenv("AZURE_ENDPOINT"),
+					APIVersion: bifrost.Ptr("2025-01-01-preview"),
+					Deployments: map[string]string{
+						"whisper":                   "whisper",
+						"gpt-4o-mini-tts":           "gpt-4o-mini-tts",
+						"gpt-4o-mini-audio-preview": "gpt-4o-mini-audio-preview",
+					},
+				},
+			},
 		}, nil
 	case schemas.Vertex:
 		return []schemas.Key{
@@ -587,6 +605,7 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
 		PromptCachingModel:   "gpt-4.1",
 		TranscriptionModel:   "whisper-1",
 		SpeechSynthesisModel: "tts-1",
+		ChatAudioModel:       "gpt-4o-mini-audio-preview",
 		Scenarios: TestScenarios{
 			TextCompletion:        false, // Not supported
 			TextCompletionStream:  false, // Not supported
@@ -618,6 +637,7 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
 			FileRetrieve:          true, // OpenAI supports file API
 			FileDelete:            true, // OpenAI supports file API
 			FileContent:           true, // OpenAI supports file API
+			ChatAudio:             true, // OpenAI supports chat audio
 		},
 		Fallbacks: []schemas.Fallback{
 			{Provider: schemas.Anthropic, Model: "claude-3-7-sonnet-20250219"},
@@ -725,9 +745,12 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
 		},
 	},
 	{
-		Provider:  schemas.Azure,
-		ChatModel: "gpt-4o",
-		TextModel: "", // Azure doesn't support text completion in newer models
+		Provider:             schemas.Azure,
+		ChatModel:            "gpt-4o",
+		TextModel:            "", // Azure doesn't support text completion in newer models
+		ChatAudioModel:       "gpt-4o-mini-audio-preview",
+		TranscriptionModel:   "whisper-1",
+		SpeechSynthesisModel: "gpt-4o-mini-tts",
 		Scenarios: TestScenarios{
 			TextCompletion:        false, // Not supported
 			SimpleChat:            true,
@@ -741,10 +764,10 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
 			ImageBase64:           true,
 			MultipleImages:        true,
 			CompleteEnd2End:       true,
-			SpeechSynthesis:       false, // Not supported yet
-			SpeechSynthesisStream: false, // Not supported yet
-			Transcription:         false, // Not supported yet
-			TranscriptionStream:   false, // Not supported yet
+			SpeechSynthesis:       true,  // Supported via gpt-4o-mini-tts
+			SpeechSynthesisStream: true,  // Supported via gpt-4o-mini-tts
+			Transcription:         true,  // Supported via whisper-1
+			TranscriptionStream:   false, // Not properly supported yet by Azure
 			Embedding:             true,
 			ListModels:            true,
 			BatchCreate:           true, // Azure supports batch API
@@ -757,6 +780,7 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
 			FileRetrieve:          true, // Azure supports file API
 			FileDelete:            true, // Azure supports file API
 			FileContent:           true, // Azure supports file API
+			ChatAudio:             true, // Azure supports chat audio
 		},
 		Fallbacks: []schemas.Fallback{
 			{Provider: schemas.OpenAI, Model: "gpt-4o-mini"},
diff --git a/core/internal/testutil/chat_audio.go b/core/internal/testutil/chat_audio.go
new file mode 100644
index 000000000..98cc6de05
--- /dev/null
+++ b/core/internal/testutil/chat_audio.go
@@ -0,0 +1,318 @@
+package testutil
+
+import (
+	"context"
+	"os"
+	"strings"
+	"testing"
+
+	bifrost "github.com/maximhq/bifrost/core"
+	"github.com/maximhq/bifrost/core/schemas"
+)
+
+// RunChatAudioTest executes the chat audio test scenario
+func RunChatAudioTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
+	if !testConfig.Scenarios.ChatAudio || testConfig.ChatAudioModel == "" {
+		t.Logf("Chat audio not supported for provider %s", testConfig.Provider)
+		return
+	}
+
+	t.Run("ChatAudio", func(t *testing.T) {
+		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
+			t.Parallel()
+		}
+
+		// Load sample audio file and encode as base64
+		encodedAudio, err := GetSampleAudioBase64()
+		if err != nil {
+			t.Fatalf("Failed to load sample audio file: %v", err)
+		}
+
+		// Create chat message with audio input
+		chatMessages := []schemas.ChatMessage{
+			CreateAudioChatMessage("Describe in detail the spoken audio input.", encodedAudio, "mp3"),
+		}
+
+		// Use retry framework for audio requests
+		retryConfig := GetTestRetryConfigForScenario("ChatAudio", testConfig)
+		retryContext := TestRetryContext{
+			ScenarioName: "ChatAudio",
+			ExpectedBehavior: map[string]interface{}{
+				"should_process_audio":     true,
+				"should_return_audio":      true,
+				"should_return_transcript": true,
+			},
+			TestMetadata: map[string]interface{}{
+				"provider": testConfig.Provider,
+				"model":    testConfig.ChatAudioModel,
+			},
+		}
+
+		// Create Chat Completions retry config
+		chatRetryConfig := ChatRetryConfig{
+			MaxAttempts: retryConfig.MaxAttempts,
+			BaseDelay:   retryConfig.BaseDelay,
+			MaxDelay:    retryConfig.MaxDelay,
+			Conditions:  []ChatRetryCondition{},
+			OnRetry:     retryConfig.OnRetry,
+			OnFinalFail: retryConfig.OnFinalFail,
+		}
+
+		// Test Chat Completions API with audio
+		chatOperation := func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
+			chatReq := &schemas.BifrostChatRequest{
+				Provider: testConfig.Provider,
+				Model:    testConfig.ChatAudioModel,
+				Input:    chatMessages,
+				Params: &schemas.ChatParameters{
+					Modalities: []string{"text", "audio"},
+					Audio: &schemas.ChatAudioParameters{
+						Voice:  "alloy",
+						Format: "wav", // output format
+					},
+					MaxCompletionTokens: bifrost.Ptr(200),
+				},
+				Fallbacks: testConfig.Fallbacks,
+			}
+			response, err := client.ChatCompletionRequest(ctx, chatReq)
+			if err != nil {
+				return nil, err
+			}
+			if response != nil {
+				return response, nil
+			}
+			return nil, &schemas.BifrostError{
+				IsBifrostError: true,
+				Error: &schemas.ErrorField{
+					Message: "No chat response returned",
+				},
+			}
+		}
+
+		expectations := GetExpectationsForScenario("ChatAudio", testConfig, map[string]interface{}{})
+		expectations = ModifyExpectationsForProvider(expectations, testConfig.Provider)
+
+		chatResponse, chatError := WithChatTestRetry(t, chatRetryConfig, retryContext, expectations, "ChatAudio", chatOperation)
+
+		// Check that the request succeeded
+		if chatError != nil {
+			t.Fatalf("❌ Chat Completions API failed: %s", GetErrorMessage(chatError))
+		}
+
+		if chatResponse == nil {
+			t.Fatal("❌ Chat response should not be nil")
+		}
+
+		if len(chatResponse.Choices) == 0 {
+			t.Fatal("❌ Chat response should have at least one choice")
+		}
+
+		choice := chatResponse.Choices[0]
+		if choice.ChatNonStreamResponseChoice == nil {
+			t.Fatal("❌ Expected non-streaming response choice")
+		}
+
+		message := choice.ChatNonStreamResponseChoice.Message
+		if message == nil {
+			t.Fatal("❌ Message should not be nil")
+		}
+
+		// Check for audio in the response
+		if message.ChatAssistantMessage == nil {
+			t.Fatal("❌ Expected ChatAssistantMessage")
+		}
+
+		if message.ChatAssistantMessage.Audio == nil {
+			t.Fatal("❌ Expected audio in response (choices[0].message.audio should be present)")
+		}
+
+		audio := message.ChatAssistantMessage.Audio
+		if audio.Data == "" {
+			t.Error("❌ Expected audio.data to be present in response")
+		} else {
+			t.Logf("✅ Audio data present in response (length: %d)", len(audio.Data))
+		}
+
+		if audio.Transcript == "" {
+			t.Error("❌ Expected audio.transcript to be present in response")
+		} else {
+			t.Logf("✅ Audio transcript present in response: %s", audio.Transcript)
+		}
+
+		// Log the content if available
+		if message.Content != nil && message.Content.ContentStr != nil {
+			t.Logf("✅ Chat response content: %s", *message.Content.ContentStr)
+		}
+
+		t.Logf("🎉 ChatAudio test passed!")
+	})
+}
+
+// RunChatAudioStreamTest executes the chat audio streaming test scenario
+func RunChatAudioStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
+	if !testConfig.Scenarios.ChatAudio || testConfig.ChatAudioModel == "" {
+		t.Logf("Chat audio streaming not supported for provider %s", testConfig.Provider)
+		return
+	}
+
+	t.Run("ChatAudioStream", func(t *testing.T) {
+		if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
+			t.Parallel()
+		}
+
+		// Load sample audio file and encode as base64
+		encodedAudio, err := GetSampleAudioBase64()
+		if err != nil {
+			t.Fatalf("Failed to load sample audio file: %v", err)
+		}
+
+		// Create chat message with audio input
+		chatMessages := []schemas.ChatMessage{
+			CreateAudioChatMessage("Describe in detail the spoken audio input.", encodedAudio, "mp3"),
+		}
+
+		// Use retry framework for audio streaming requests
+		retryConfig := StreamingRetryConfig()
+		retryContext := TestRetryContext{
+			ScenarioName: "ChatAudioStream",
+			ExpectedBehavior: map[string]interface{}{
+				"should_process_audio":     true,
+				"should_return_audio":      true,
+				"should_return_transcript": true,
+			},
+			TestMetadata: map[string]interface{}{
+				"provider": testConfig.Provider,
+				"model":    testConfig.ChatAudioModel,
+			},
+		}
+
+		// Test Chat Completions Stream API with audio
+		chatReq := &schemas.BifrostChatRequest{
+			Provider: testConfig.Provider,
+			Model:    testConfig.ChatAudioModel,
+			Input:    chatMessages,
+			Params: &schemas.ChatParameters{
+				Modalities: []string{"text", "audio"},
+				Audio: &schemas.ChatAudioParameters{
+					Voice:  "alloy",
+					Format: "pcm16", // output format
+				},
+			},
+			Fallbacks: testConfig.Fallbacks,
+		}
+
+		responseChannel, bifrostErr := WithStreamRetry(t, retryConfig, retryContext, func() (chan *schemas.BifrostStream, *schemas.BifrostError) {
+			return client.ChatCompletionStreamRequest(ctx, chatReq)
+		})
+
+		// Enhanced error handling
+		if bifrostErr != nil {
+			t.Fatalf("Chat audio stream request failed: %v", bifrostErr)
+		}
+		if responseChannel == nil {
+			t.Fatal("Response channel should not be nil")
+		}
+
+		// Accumulate stream chunks
+		var chunks []*schemas.BifrostStream
+		var audioData strings.Builder
+		var audioTranscript strings.Builder
+		var audioID string
+		var audioExpiresAt int
+		var lastUsage *schemas.BifrostLLMUsage
+
+		for chunk := range responseChannel {
+			chunks = append(chunks, chunk)
+
+			if chunk.BifrostError != nil && chunk.BifrostError.Error != nil {
+				t.Fatalf("Stream error: %v", chunk.BifrostError.Error)
+			}
+
+			if chunk.BifrostChatResponse != nil {
+				if len(chunk.BifrostChatResponse.Choices) > 0 {
+					choice := chunk.BifrostChatResponse.Choices[0]
+
+					// Accumulate text content
+					if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil {
+						delta := choice.ChatStreamResponseChoice.Delta
+
+						// Accumulate audio data from delta
+						if delta.Audio != nil {
+							if delta.Audio.Data != "" {
+								audioData.WriteString(delta.Audio.Data)
+							}
+							if delta.Audio.Transcript != "" {
+								audioTranscript.WriteString(delta.Audio.Transcript)
+							}
+							if delta.Audio.ID != "" {
+								audioID = delta.Audio.ID
+							}
+							if delta.Audio.ExpiresAt != 0 {
+								audioExpiresAt = delta.Audio.ExpiresAt
+							}
+						}
+					}
+				}
+
+				// Capture final usage
+				if chunk.BifrostChatResponse.Usage != nil {
+					lastUsage = chunk.BifrostChatResponse.Usage
+				}
+			}
+		}
+
+		// Validate that we received chunks
+		if len(chunks) == 0 {
+			t.Fatal("❌ Expected to receive stream chunks")
+		}
+
+		t.Logf("✅ Received %d stream chunks", len(chunks))
+
+		// Validate accumulated audio data (check overall, not per-chunk)
+		accumulatedAudioData := audioData.String()
+		accumulatedTranscript := audioTranscript.String()
+
+		// Check overall: at least one of audio data or transcript should be present
+		if accumulatedAudioData == "" && accumulatedTranscript == "" {
+			t.Fatal("❌ Expected overall audio data or transcript to be present in stream chunks")
+		}
+
+		if accumulatedAudioData != "" {
+			t.Logf("✅ Accumulated audio data (length: %d)", len(accumulatedAudioData))
+		} else {
+			t.Logf("⚠️ No accumulated audio data found")
+		}
+
+		if accumulatedTranscript != "" {
+			t.Logf("✅ Accumulated audio transcript: %s", accumulatedTranscript)
+		} else {
+			t.Logf("⚠️ No accumulated audio transcript found")
+		}
+
+		// Validate audio metadata
+		if audioID != "" {
+			t.Logf("✅ Audio ID: %s", audioID)
+		}
+		if audioExpiresAt != 0 {
+			t.Logf("✅ Audio expires at: %d", audioExpiresAt)
+		}
+
+		// Validate usage if available
+		if lastUsage != nil {
+			t.Logf("✅ Token usage - Prompt: %d, Completion: %d, Total: %d",
+				lastUsage.PromptTokens,
+				lastUsage.CompletionTokens,
+				lastUsage.TotalTokens)
+
+			// Check for audio tokens
+			if lastUsage.PromptTokensDetails != nil && lastUsage.PromptTokensDetails.AudioTokens > 0 {
+				t.Logf("✅ Input audio tokens: %d", lastUsage.PromptTokensDetails.AudioTokens)
+			}
+			if lastUsage.CompletionTokensDetails != nil && lastUsage.CompletionTokensDetails.AudioTokens > 0 {
+				t.Logf("✅ Output audio tokens: %d", lastUsage.CompletionTokensDetails.AudioTokens)
+			}
+		}
+
+		t.Logf("🎉 ChatAudioStream test passed!")
+	})
+}
diff --git a/core/internal/testutil/scenarios/media/sample.mp3 b/core/internal/testutil/scenarios/media/sample.mp3
new file mode 100644
index 000000000..c2ef055f1
Binary files /dev/null and b/core/internal/testutil/scenarios/media/sample.mp3 differ
diff --git a/core/internal/testutil/tests.go b/core/internal/testutil/tests.go
index f9a7401e5..aa0b3eaf0 100644
--- a/core/internal/testutil/tests.go
+++ b/core/internal/testutil/tests.go
@@ -64,6 +64,8 @@ func RunAllComprehensiveTests(t *testing.T, client *bifrost.Bifrost, ctx context
 		RunFileContentTest,
 		RunFileUnsupportedTest,
 		RunFileAndBatchIntegrationTest,
+		RunChatAudioTest,
+		RunChatAudioStreamTest,
 	}
 
 	// Execute all test scenarios
@@ -116,6 +118,8 @@ func printTestSummary(t *testing.T, testConfig ComprehensiveTestConfig) {
 		{"FileContent", testConfig.Scenarios.FileContent},
 		{"FileUnsupported", !testConfig.Scenarios.FileUpload && !testConfig.Scenarios.FileList && !testConfig.Scenarios.FileRetrieve && !testConfig.Scenarios.FileDelete && !testConfig.Scenarios.FileContent},
 		{"FileAndBatchIntegration", testConfig.Scenarios.FileBatchInput},
+		{"ChatAudio", testConfig.Scenarios.ChatAudio && testConfig.ChatAudioModel != ""},
+		{"ChatAudioStream", testConfig.Scenarios.ChatAudio && testConfig.ChatAudioModel != ""},
 	}
 
 	supported := 0
diff --git a/core/internal/testutil/transcription.go b/core/internal/testutil/transcription.go
index 0e5c1d18b..a577e7905 100644
--- a/core/internal/testutil/transcription.go
+++ b/core/internal/testutil/transcription.go
@@ -59,11 +59,21 @@ func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Con
 					t.Parallel()
 				}
 
+				speechSynthesisProvider := testConfig.Provider
+				if testConfig.ExternalTTSProvider != "" {
+					speechSynthesisProvider = testConfig.ExternalTTSProvider
+				}
+
+				speechSynthesisModel := testConfig.SpeechSynthesisModel
+				if testConfig.ExternalTTSModel != "" {
+					speechSynthesisModel = testConfig.ExternalTTSModel
+				}
+
 				// Step 1: Generate TTS audio
-				voice := GetProviderVoice(testConfig.Provider, tc.voiceType)
+				voice := GetProviderVoice(speechSynthesisProvider, tc.voiceType)
 				ttsRequest := &schemas.BifrostSpeechRequest{
-					Provider: testConfig.Provider,
-					Model:    testConfig.SpeechSynthesisModel,
+					Provider: speechSynthesisProvider,
+					Model:    speechSynthesisModel,
 					Input: &schemas.SpeechInput{
 						Input: tc.text,
 					},
@@ -84,8 +94,8 @@ func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Con
 						"should_generate_audio": true,
 					},
 					TestMetadata: map[string]interface{}{
-						"provider": testConfig.Provider,
-						"model":    testConfig.SpeechSynthesisModel,
+						"provider": speechSynthesisProvider,
+						"model":    speechSynthesisModel,
 						"format":   tc.format,
 					},
 				}
@@ -209,8 +219,18 @@ func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Con
 						t.Parallel()
 					}
 
+					speechSynthesisProvider := testConfig.Provider
+					if testConfig.ExternalTTSProvider != "" {
+						speechSynthesisProvider = testConfig.ExternalTTSProvider
+					}
+
+					speechSynthesisModel := testConfig.SpeechSynthesisModel
+					if testConfig.ExternalTTSModel != "" {
+						speechSynthesisModel = testConfig.ExternalTTSModel
+					}
+
 					// Use the utility function to generate audio
-					audioData, _ := GenerateTTSAudioForTest(ctx, t, client, testConfig.Provider, testConfig.SpeechSynthesisModel, tc.text, "primary", "mp3")
+					audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, tc.text, "primary", "mp3")
 
 					// Test transcription
 					request := &schemas.BifrostTranscriptionRequest{
@@ -292,8 +312,18 @@ func RunTranscriptionAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx con
 						t.Parallel()
 					}
 
+					speechSynthesisProvider := testConfig.Provider
+					if testConfig.ExternalTTSProvider != "" {
+						speechSynthesisProvider = testConfig.ExternalTTSProvider
+					}
+
+					speechSynthesisModel := testConfig.SpeechSynthesisModel
+					if testConfig.ExternalTTSModel != "" {
+						speechSynthesisModel = testConfig.ExternalTTSModel
+					}
+
 					// Generate fresh audio for each test to avoid race conditions and ensure validity
-					audioData, _ := GenerateTTSAudioForTest(ctx, t, client, testConfig.Provider, testConfig.SpeechSynthesisModel, TTSTestTextBasic, "primary", "mp3")
+					audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextBasic, "primary", "mp3")
 
 					formatCopy := format
 					request := &schemas.BifrostTranscriptionRequest{
@@ -360,8 +390,18 @@ func RunTranscriptionAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx con
 				t.Parallel()
 			}
 
+			speechSynthesisProvider := testConfig.Provider
+			if testConfig.ExternalTTSProvider != "" {
+				speechSynthesisProvider = testConfig.ExternalTTSProvider
+			}
+
+			speechSynthesisModel := testConfig.SpeechSynthesisModel
+			if testConfig.ExternalTTSModel != "" {
+				speechSynthesisModel = testConfig.ExternalTTSModel
+			}
+
 			// Generate audio for custom parameters test
-			audioData, _ := GenerateTTSAudioForTest(ctx, t, client, testConfig.Provider, testConfig.SpeechSynthesisModel, TTSTestTextMedium, "secondary", "mp3")
+			audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextMedium, "secondary", "mp3")
 
 			// Test with custom parameters and temperature
 			request := &schemas.BifrostTranscriptionRequest{
@@ -432,8 +472,18 @@ func RunTranscriptionAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx con
 						t.Parallel()
 					}
 
+					speechSynthesisProvider := testConfig.Provider
+					if testConfig.ExternalTTSProvider != "" {
+						speechSynthesisProvider = testConfig.ExternalTTSProvider
+					}
+
+					speechSynthesisModel := testConfig.SpeechSynthesisModel
+					if testConfig.ExternalTTSModel != "" {
+						speechSynthesisModel = testConfig.ExternalTTSModel
+					}
+
 					// Generate fresh audio for each test to avoid race conditions and ensure validity
-					audioData, _ := GenerateTTSAudioForTest(ctx, t, client, testConfig.Provider, testConfig.SpeechSynthesisModel, TTSTestTextBasic, "primary", "mp3")
+					audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextBasic, "primary", "mp3")
 
 					langCopy := lang
 					request := &schemas.BifrostTranscriptionRequest{
diff --git a/core/internal/testutil/transcription_stream.go b/core/internal/testutil/transcription_stream.go
index 25d4e31dd..1f27dcf1f 100644
--- a/core/internal/testutil/transcription_stream.go
+++ b/core/internal/testutil/transcription_stream.go
@@ -9,7 +9,6 @@ import (
 	"testing"
 	"time"
 
-
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -63,11 +62,21 @@ func RunTranscriptionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx conte
 					t.Parallel()
 				}
 
+				speechSynthesisProvider := testConfig.Provider
+				if testConfig.ExternalTTSProvider != "" {
+					speechSynthesisProvider = testConfig.ExternalTTSProvider
+				}
+
+				speechSynthesisModel := testConfig.SpeechSynthesisModel
+				if testConfig.ExternalTTSModel != "" {
+					speechSynthesisModel = testConfig.ExternalTTSModel
+				}
+
 				// Step 1: Generate TTS audio
-				voice := GetProviderVoice(testConfig.Provider, tc.voiceType)
+				voice := GetProviderVoice(speechSynthesisProvider, tc.voiceType)
 				ttsRequest := &schemas.BifrostSpeechRequest{
-					Provider: testConfig.Provider,
-					Model:    testConfig.SpeechSynthesisModel,
+					Provider: speechSynthesisProvider,
+					Model:    speechSynthesisModel,
 					Input: &schemas.SpeechInput{
 						Input: tc.text,
 					},
@@ -88,8 +97,8 @@ func RunTranscriptionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx conte
 						"should_generate_audio": true,
 					},
 					TestMetadata: map[string]interface{}{
-						"provider": testConfig.Provider,
-						"model":    testConfig.SpeechSynthesisModel,
+						"provider": speechSynthesisProvider,
+						"model":    speechSynthesisModel,
 					},
 				}
 				ttsExpectations := SpeechExpectations(100)
@@ -335,8 +344,18 @@ func RunTranscriptionStreamAdvancedTest(t *testing.T, client *bifrost.Bifrost, c
 				t.Parallel()
 			}
 
+			speechSynthesisProvider := testConfig.Provider
+			if testConfig.ExternalTTSProvider != "" {
+				speechSynthesisProvider = testConfig.ExternalTTSProvider
+			}
+
+			speechSynthesisModel := testConfig.SpeechSynthesisModel
+			if testConfig.ExternalTTSModel != "" {
+				speechSynthesisModel = testConfig.ExternalTTSModel
+			}
+
 			// Generate audio for streaming test
-			audioData, _ := GenerateTTSAudioForTest(ctx, t, client, testConfig.Provider, testConfig.SpeechSynthesisModel, TTSTestTextBasic, "primary", "mp3")
+			audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextBasic, "primary", "mp3")
 
 			// Test streaming with JSON format
 			request := &schemas.BifrostTranscriptionRequest{
@@ -420,8 +439,18 @@ func RunTranscriptionStreamAdvancedTest(t *testing.T, client *bifrost.Bifrost, c
 				t.Parallel()
 			}
 
+			speechSynthesisProvider := testConfig.Provider
+			if testConfig.ExternalTTSProvider != "" {
+				speechSynthesisProvider = testConfig.ExternalTTSProvider
+			}
+
+			speechSynthesisModel := testConfig.SpeechSynthesisModel
+			if testConfig.ExternalTTSModel != "" {
+				speechSynthesisModel = testConfig.ExternalTTSModel
+			}
+
 			// Generate audio for language streaming tests
-			audioData, _ := GenerateTTSAudioForTest(ctx, t, client, testConfig.Provider, testConfig.SpeechSynthesisModel, TTSTestTextBasic, "primary", "mp3")
+			audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextBasic, "primary", "mp3")
 			// Test streaming with different language hints (only English for now)
 			languages := []string{"en"}
 
@@ -509,8 +538,18 @@ func RunTranscriptionStreamAdvancedTest(t *testing.T, client *bifrost.Bifrost, c
 				t.Parallel()
 			}
 
+			speechSynthesisProvider := testConfig.Provider
+			if testConfig.ExternalTTSProvider != "" {
+				speechSynthesisProvider = testConfig.ExternalTTSProvider
+			}
+
+			speechSynthesisModel := testConfig.SpeechSynthesisModel
+			if testConfig.ExternalTTSModel != "" {
+				speechSynthesisModel = testConfig.ExternalTTSModel
+			}
+
 			// Generate audio for custom prompt streaming test
-			audioData, _ := GenerateTTSAudioForTest(ctx, t, client, testConfig.Provider, testConfig.SpeechSynthesisModel, TTSTestTextTechnical, "tertiary", "mp3")
+			audioData, _ := GenerateTTSAudioForTest(ctx, t, client, speechSynthesisProvider, speechSynthesisModel, TTSTestTextTechnical, "tertiary", "mp3")
 
 			// Test streaming with custom prompt for context
 			request := &schemas.BifrostTranscriptionRequest{
diff --git a/core/internal/testutil/utils.go b/core/internal/testutil/utils.go
index 12b8109aa..5f37dc6be 100644
--- a/core/internal/testutil/utils.go
+++ b/core/internal/testutil/utils.go
@@ -2,6 +2,7 @@ package testutil
 
 import (
 	"context"
+	"encoding/base64"
 	"fmt"
 	"os"
 	"path/filepath"
@@ -215,6 +216,22 @@ func GetLionBase64Image() (string, error) {
 	return "data:image/png;base64," + string(data), nil
 }
 
+// GetSampleAudioBase64 loads and returns the sample audio file as base64 encoded string
+func GetSampleAudioBase64() (string, error) {
+	_, filename, _, ok := runtime.Caller(0)
+	if !ok {
+		return "", fmt.Errorf("failed to get current file path")
+	}
+	dir := filepath.Dir(filename)
+	filePath := filepath.Join(dir, "scenarios", "media", "sample.mp3")
+
+	data, err := os.ReadFile(filePath)
+	if err != nil {
+		return "", err
+	}
+	return base64.StdEncoding.EncodeToString(data), nil
+}
+
 // CreateSpeechRequest creates a basic speech input for testing
 func CreateSpeechRequest(text, voice, format string) *schemas.BifrostSpeechRequest {
 	return &schemas.BifrostSpeechRequest{
@@ -292,6 +309,25 @@ func CreateImageResponsesMessage(text, imageURL string) schemas.ResponsesMessage
 	}
 }
 
+func CreateAudioChatMessage(text, audioData string, audioFormat string) schemas.ChatMessage {
+	format := bifrost.Ptr(audioFormat)
+	return schemas.ChatMessage{
+		Role: schemas.ChatMessageRoleUser,
+		Content: &schemas.ChatMessageContent{
+			ContentBlocks: []schemas.ChatContentBlock{
+				{Type: schemas.ChatContentBlockTypeText, Text: bifrost.Ptr(text)},
+				{
+					Type: schemas.ChatContentBlockTypeInputAudio,
+					InputAudio: &schemas.ChatInputAudio{
+						Data:   audioData,
+						Format: format,
+					},
+				},
+			},
+		},
+	}
+}
+
 func CreateToolChatMessage(content string, toolCallID string) schemas.ChatMessage {
 	return schemas.ChatMessage{
 		Role: schemas.ChatMessageRoleTool,
@@ -627,7 +663,7 @@ func GetErrorMessage(err *schemas.BifrostError) string {
 	}
 
 	errorCode := ""
-	if err.Error != nil &&  err.Error.Code != nil && *err.Error.Code != "" {
+	if err.Error != nil && err.Error.Code != nil && *err.Error.Code != "" {
 		errorCode = *err.Error.Code
 	}
 
diff --git a/core/internal/testutil/validation_presets.go b/core/internal/testutil/validation_presets.go
index c096be72f..cd45eb331 100644
--- a/core/internal/testutil/validation_presets.go
+++ b/core/internal/testutil/validation_presets.go
@@ -3,7 +3,6 @@ package testutil
 import (
 	"regexp"
 
-
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
@@ -200,6 +199,21 @@ func ReasoningExpectations() ResponseExpectations {
 	}
 }
 
+// ChatAudioExpectations returns validation expectations for chat audio scenarios
+func ChatAudioExpectations() ResponseExpectations {
+	return ResponseExpectations{
+		ShouldHaveContent:    false, // Chat audio responses may have audio/transcript but not text content
+		ExpectedChoiceCount:  1,     // Should have one choice with audio data
+		ShouldHaveUsageStats: true,
+		ShouldHaveTimestamps: true,
+		ShouldHaveModel:      true,
+		ShouldHaveLatency:    true, // Global expectation: latency should always be present
+		ProviderSpecific: map[string]interface{}{
+			"response_type": "chat_audio",
+		},
+	}
+}
+
 // =============================================================================
 // SCENARIO-SPECIFIC EXPECTATION BUILDERS
 // =============================================================================
@@ -281,6 +295,9 @@ func GetExpectationsForScenario(scenarioName string, testConfig ComprehensiveTes
 		expectations := ReasoningExpectations()
 		return expectations
 
+	case "ChatAudio":
+		return ChatAudioExpectations()
+
 	case "ProviderSpecific":
 		expectations := BasicChatExpectations()
 		expectations.ShouldContainKeywords = []string{"unique", "specific", "capability"}
diff --git a/core/providers/azure/azure_test.go b/core/providers/azure/azure_test.go
index e51c503ab..a973879ed 100644
--- a/core/providers/azure/azure_test.go
+++ b/core/providers/azure/azure_test.go
@@ -24,15 +24,18 @@ func TestAzure(t *testing.T) {
 	defer cancel()
 
 	testConfig := testutil.ComprehensiveTestConfig{
-		Provider:    schemas.Azure,
-		ChatModel:   "gpt-4o-backup",
-		VisionModel: "gpt-4o",
+		Provider:       schemas.Azure,
+		ChatModel:      "gpt-4o-backup",
+		VisionModel:    "gpt-4o",
+		ChatAudioModel: "gpt-4o-mini-audio-preview",
 		Fallbacks: []schemas.Fallback{
 			{Provider: schemas.Azure, Model: "gpt-4o-backup"},
 		},
-		TextModel:      "", // Azure doesn't support text completion in newer models
-		EmbeddingModel: "text-embedding-ada-002",
-		ReasoningModel: "claude-opus-4-5",
+		TextModel:            "", // Azure doesn't support text completion in newer models
+		EmbeddingModel:       "text-embedding-ada-002",
+		ReasoningModel:       "claude-opus-4-5",
+		SpeechSynthesisModel: "gpt-4o-mini-tts",
+		TranscriptionModel:   "whisper",
 		Scenarios: testutil.TestScenarios{
 			TextCompletion:        false, // Not supported
 			SimpleChat:            true,
@@ -50,6 +53,11 @@ func TestAzure(t *testing.T) {
 			Embedding:             true,
 			ListModels:            true,
 			Reasoning:             true,
+			ChatAudio:             true,
+			Transcription:         true,
+			TranscriptionStream:   false, // Not properly supported yet by Azure
+			SpeechSynthesis:       true,
+			SpeechSynthesisStream: true,
 		},
 	}
 
diff --git a/core/providers/mistral/mistral.go b/core/providers/mistral/mistral.go
index d5c221e4a..0be35657d 100644
--- a/core/providers/mistral/mistral.go
+++ b/core/providers/mistral/mistral.go
@@ -467,6 +467,11 @@ func (provider *MistralProvider) TranscriptionStream(ctx context.Context, postHo
 				if currentEvent != "" && currentData != "" {
 					chunkIndex++
 					provider.processTranscriptionStreamEvent(ctx, postHookRunner, currentEvent, currentData, request.Model, providerName, chunkIndex, startTime, &lastChunkTime, responseChan)
+					// Break the loop if this was a done event (check both possible event types)
+					eventType := MistralTranscriptionStreamEventType(currentEvent)
+					if eventType == MistralTranscriptionStreamEventDone || currentEvent == "transcript.text.done" {
+						break
+					}
 				}
 				// Reset for next event
 				currentEvent = ""
@@ -486,6 +491,7 @@ func (provider *MistralProvider) TranscriptionStream(ctx context.Context, postHo
 		if currentEvent != "" && currentData != "" {
 			chunkIndex++
 			provider.processTranscriptionStreamEvent(ctx, postHookRunner, currentEvent, currentData, request.Model, providerName, chunkIndex, startTime, &lastChunkTime, responseChan)
+			// Note: No need to break here as scanner.Scan() has already finished
 		}
 
 		// Handle scanner errors
@@ -564,10 +570,12 @@ func (provider *MistralProvider) processTranscriptionStreamEvent(
 		response.ExtraFields.RawResponse = jsonData
 	}
 
-	// Check for done event
-	if MistralTranscriptionStreamEventType(eventType) == MistralTranscriptionStreamEventDone {
+	// Check for done event (handle both "transcription.done" and "transcript.text.done")
+	if MistralTranscriptionStreamEventType(eventType) == MistralTranscriptionStreamEventDone || eventType == "transcript.text.done" {
 		response.ExtraFields.Latency = time.Since(startTime).Milliseconds()
 		ctx = context.WithValue(ctx, schemas.BifrostContextKeyStreamEndIndicator, true)
+		// Ensure response type is set to Done
+		response.Type = schemas.TranscriptionStreamResponseTypeDone
 	}
 
 	providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, nil, nil, nil, response), responseChan)
diff --git a/core/providers/mistral/mistral_test.go b/core/providers/mistral/mistral_test.go
index 136c0c5bb..5ab9c8ade 100644
--- a/core/providers/mistral/mistral_test.go
+++ b/core/providers/mistral/mistral_test.go
@@ -28,9 +28,11 @@ func TestMistral(t *testing.T) {
 		Fallbacks: []schemas.Fallback{
 			{Provider: schemas.Mistral, Model: "mistral-small-2503"},
 		},
-		VisionModel:        "pixtral-12b-latest",
-		EmbeddingModel:     "codestral-embed",
-		TranscriptionModel: "voxtral-mini-latest", // Mistral's audio transcription model
+		VisionModel:         "pixtral-12b-latest",
+		EmbeddingModel:      "codestral-embed",
+		TranscriptionModel:  "voxtral-mini-latest", // Mistral's audio transcription model
+		ExternalTTSProvider: schemas.OpenAI,
+		ExternalTTSModel:    "gpt-4o-mini-tts",
 		Scenarios: testutil.TestScenarios{
 			TextCompletion:        false, // Not supported
 			SimpleChat:            true,
@@ -47,8 +49,8 @@ func TestMistral(t *testing.T) {
 			CompleteEnd2End:       true,
 			Embedding:             true,
 			Transcription:         true,
-			TranscriptionStream:   true, // Streaming transcription supported
-			ListModels:            false,
+			TranscriptionStream:   true,
+			ListModels:            true,
 			Reasoning:             false, // Not supported right now because we are not using native mistral converters
 		},
 	}
diff --git a/core/providers/mistral/transcription_test.go b/core/providers/mistral/transcription_test.go
index cc66da0f6..d1a2dbb68 100644
--- a/core/providers/mistral/transcription_test.go
+++ b/core/providers/mistral/transcription_test.go
@@ -980,6 +980,7 @@ func TestCreateMistralTranscriptionStreamMultipartBody(t *testing.T) {
 				Model:    "voxtral-mini-latest",
 				File:     []byte{0x01, 0x02, 0x03},
 				Language: schemas.Ptr("en"),
+				Stream:   schemas.Ptr(true),
 			},
 			expectedFields: map[string]string{
 				"stream":   "true",
@@ -996,6 +997,7 @@ func TestCreateMistralTranscriptionStreamMultipartBody(t *testing.T) {
 				Prompt:                 schemas.Ptr("Test prompt"),
 				ResponseFormat:         schemas.Ptr("verbose_json"),
 				Temperature:            schemas.Ptr(0.5),
+				Stream:                 schemas.Ptr(true),
 				TimestampGranularities: []string{"word", "segment"},
 			},
 			expectedFields: map[string]string{
diff --git a/core/providers/openai/openai.go b/core/providers/openai/openai.go
index 31628e8cc..6085080da 100644
--- a/core/providers/openai/openai.go
+++ b/core/providers/openai/openai.go
@@ -1048,6 +1048,7 @@ func HandleOpenAIChatCompletionStreaming(
 				if choice.ChatStreamResponseChoice != nil &&
 					choice.ChatStreamResponseChoice.Delta != nil &&
 					(choice.ChatStreamResponseChoice.Delta.Content != nil ||
+						choice.ChatStreamResponseChoice.Delta.Audio != nil ||
 						len(choice.ChatStreamResponseChoice.Delta.ToolCalls) > 0) {
 					chunkIndex++
 
diff --git a/core/providers/openai/openai_test.go b/core/providers/openai/openai_test.go
index 19077b809..0bedc6747 100644
--- a/core/providers/openai/openai_test.go
+++ b/core/providers/openai/openai_test.go
@@ -38,6 +38,7 @@ func TestOpenAI(t *testing.T) {
 		},
 		SpeechSynthesisModel: "gpt-4o-mini-tts",
 		ReasoningModel:       "o1",
+		ChatAudioModel:       "gpt-4o-mini-audio-preview",
 		Scenarios: testutil.TestScenarios{
 			TextCompletion:        true,
 			TextCompletionStream:  true,
@@ -71,6 +72,7 @@ func TestOpenAI(t *testing.T) {
 			FileDelete:            true,
 			FileContent:           true,
 			FileBatchInput:        true,
+			ChatAudio:             true,
 		},
 	}
 
diff --git a/core/schemas/chatcompletions.go b/core/schemas/chatcompletions.go
index 9ec7c6193..c305cc059 100644
--- a/core/schemas/chatcompletions.go
+++ b/core/schemas/chatcompletions.go
@@ -154,30 +154,31 @@ func (cr *BifrostChatResponse) ToTextCompletionResponse() *BifrostTextCompletion
 
 // ChatParameters represents the parameters for a chat completion.
 type ChatParameters struct {
-	FrequencyPenalty    *float64            `json:"frequency_penalty,omitempty"`     // Penalizes frequent tokens
-	LogitBias           *map[string]float64 `json:"logit_bias,omitempty"`            // Bias for logit values
-	LogProbs            *bool               `json:"logprobs,omitempty"`              // Number of logprobs to return
-	MaxCompletionTokens *int                `json:"max_completion_tokens,omitempty"` // Maximum number of tokens to generate
-	Metadata            *map[string]any     `json:"metadata,omitempty"`              // Metadata to be returned with the response
-	Modalities          []string            `json:"modalities,omitempty"`            // Modalities to be returned with the response
-	ParallelToolCalls   *bool               `json:"parallel_tool_calls,omitempty"`
-	PresencePenalty     *float64            `json:"presence_penalty,omitempty"`  // Penalizes repeated tokens
-	PromptCacheKey      *string             `json:"prompt_cache_key,omitempty"`  // Prompt cache key
-	Reasoning           *ChatReasoning      `json:"reasoning,omitempty"`         // Reasoning parameters
-	ResponseFormat      *interface{}        `json:"response_format,omitempty"`   // Format for the response
-	SafetyIdentifier    *string             `json:"safety_identifier,omitempty"` // Safety identifier
-	Seed                *int                `json:"seed,omitempty"`
-	ServiceTier         *string             `json:"service_tier,omitempty"`
-	StreamOptions       *ChatStreamOptions  `json:"stream_options,omitempty"`
-	Stop                []string            `json:"stop,omitempty"`
-	Store               *bool               `json:"store,omitempty"`
-	Temperature         *float64            `json:"temperature,omitempty"`
-	TopLogProbs         *int                `json:"top_logprobs,omitempty"`
-	TopP                *float64            `json:"top_p,omitempty"`       // Controls diversity via nucleus sampling
-	ToolChoice          *ChatToolChoice     `json:"tool_choice,omitempty"` // Whether to call a tool
-	Tools               []ChatTool          `json:"tools,omitempty"`       // Tools to use
-	User                *string             `json:"user,omitempty"`        // User identifier for tracking
-	Verbosity           *string             `json:"verbosity,omitempty"`   // "low" | "medium" | "high"
+	Audio               *ChatAudioParameters `json:"audio,omitempty"`                 // Audio parameters
+	FrequencyPenalty    *float64             `json:"frequency_penalty,omitempty"`     // Penalizes frequent tokens
+	LogitBias           *map[string]float64  `json:"logit_bias,omitempty"`            // Bias for logit values
+	LogProbs            *bool                `json:"logprobs,omitempty"`              // Number of logprobs to return
+	MaxCompletionTokens *int                 `json:"max_completion_tokens,omitempty"` // Maximum number of tokens to generate
+	Metadata            *map[string]any      `json:"metadata,omitempty"`              // Metadata to be returned with the response
+	Modalities          []string             `json:"modalities,omitempty"`            // Modalities to be returned with the response
+	ParallelToolCalls   *bool                `json:"parallel_tool_calls,omitempty"`
+	PresencePenalty     *float64             `json:"presence_penalty,omitempty"`  // Penalizes repeated tokens
+	PromptCacheKey      *string              `json:"prompt_cache_key,omitempty"`  // Prompt cache key
+	Reasoning           *ChatReasoning       `json:"reasoning,omitempty"`         // Reasoning parameters
+	ResponseFormat      *interface{}         `json:"response_format,omitempty"`   // Format for the response
+	SafetyIdentifier    *string              `json:"safety_identifier,omitempty"` // Safety identifier
+	Seed                *int                 `json:"seed,omitempty"`
+	ServiceTier         *string              `json:"service_tier,omitempty"`
+	StreamOptions       *ChatStreamOptions   `json:"stream_options,omitempty"`
+	Stop                []string             `json:"stop,omitempty"`
+	Store               *bool                `json:"store,omitempty"`
+	Temperature         *float64             `json:"temperature,omitempty"`
+	TopLogProbs         *int                 `json:"top_logprobs,omitempty"`
+	TopP                *float64             `json:"top_p,omitempty"`       // Controls diversity via nucleus sampling
+	ToolChoice          *ChatToolChoice      `json:"tool_choice,omitempty"` // Whether to call a tool
+	Tools               []ChatTool           `json:"tools,omitempty"`       // Tools to use
+	User                *string              `json:"user,omitempty"`        // User identifier for tracking
+	Verbosity           *string              `json:"verbosity,omitempty"`   // "low" | "medium" | "high"
 
 	// Dynamic parameters that can be provider-specific, they are directly
 	// added to the request as is.
@@ -220,6 +221,12 @@ func (cp *ChatParameters) UnmarshalJSON(data []byte) error {
 	return nil
 }
 
+// ChatAudioParameters represents the parameters for a chat audio completion. (Only supported by OpenAI Models that support audio input)
+type ChatAudioParameters struct {
+	Format string `json:"format,omitempty"` // Format for the audio completion
+	Voice  string `json:"voice,omitempty"`  // Voice to use for the audio completion
+}
+
 // Not in OpenAI's spec, but needed to support extra parameters for reasoning.
 type ChatReasoning struct {
 	Effort    *string `json:"effort,omitempty"`     // "none" |  "minimal" | "low" | "medium" | "high" (any value other than "none" will enable reasoning)
@@ -515,7 +522,7 @@ func (cm *ChatMessage) UnmarshalJSON(data []byte) error {
 	// Only set if any field is populated
 	if assistantMsg.Refusal != nil || assistantMsg.Reasoning != nil ||
 		len(assistantMsg.ReasoningDetails) > 0 || len(assistantMsg.Annotations) > 0 ||
-		len(assistantMsg.ToolCalls) > 0 {
+		len(assistantMsg.ToolCalls) > 0 || assistantMsg.Audio != nil {
 		cm.ChatAssistantMessage = &assistantMsg
 	}
 
@@ -641,6 +648,7 @@ type ChatToolMessage struct {
 // ChatAssistantMessage represents a message in a chat conversation.
 type ChatAssistantMessage struct {
 	Refusal          *string                          `json:"refusal,omitempty"`
+	Audio            *ChatAudioMessageAudio           `json:"audio,omitempty"`
 	Reasoning        *string                          `json:"reasoning,omitempty"`
 	ReasoningDetails []ChatReasoningDetails           `json:"reasoning_details,omitempty"`
 	Annotations      []ChatAssistantMessageAnnotation `json:"annotations,omitempty"`
@@ -713,6 +721,14 @@ type ChatAssistantMessageToolCallFunction struct {
 	Arguments string  `json:"arguments"` // stringified json as retured by OpenAI, might not be a valid JSON always
 }
 
+// ChatAudioMessageAudio represents audio data in a message.
+type ChatAudioMessageAudio struct {
+	ID         string `json:"id"`
+	Data       string `json:"data"`
+	ExpiresAt  int    `json:"expires_at"`
+	Transcript string `json:"transcript"`
+}
+
 // BifrostResponseChoice represents a choice in the completion result.
 // This struct can represent either a streaming or non-streaming response choice.
 // IMPORTANT: Only one of TextCompletionResponseChoice, NonStreamResponseChoice or StreamResponseChoice
@@ -773,6 +789,7 @@ type ChatStreamResponseChoiceDelta struct {
 	Role             *string                        `json:"role,omitempty"`      // Only in the first chunk
 	Content          *string                        `json:"content,omitempty"`   // May be empty string or null
 	Refusal          *string                        `json:"refusal,omitempty"`   // Refusal content if any
+	Audio            *ChatAudioMessageAudio         `json:"audio,omitempty"`     // Audio data if any
 	Reasoning        *string                        `json:"reasoning,omitempty"` // May be empty string or null
 	ReasoningDetails []ChatReasoningDetails         `json:"reasoning_details,omitempty"`
 	ToolCalls        []ChatAssistantMessageToolCall `json:"tool_calls,omitempty"` // If tool calls used (supports incremental updates)
@@ -835,7 +852,9 @@ type BifrostLLMUsage struct {
 }
 
 type ChatPromptTokensDetails struct {
+	TextTokens  int `json:"text_tokens,omitempty"`
 	AudioTokens int `json:"audio_tokens,omitempty"`
+	ImageTokens int `json:"image_tokens,omitempty"`
 
 	// For Providers which follow OpenAI's spec, CachedTokens means the number of input tokens read from the cache+input tokens used to create the cache entry. (because they do not differentiate between cache creation and cache read tokens)
 	// For Providers which do not follow OpenAI's spec, CachedTokens means only the number of input tokens read from the cache.
@@ -843,6 +862,7 @@ type ChatPromptTokensDetails struct {
 }
 
 type ChatCompletionTokensDetails struct {
+	TextTokens               int  `json:"text_tokens,omitempty"`
 	AcceptedPredictionTokens int  `json:"accepted_prediction_tokens,omitempty"`
 	AudioTokens              int  `json:"audio_tokens,omitempty"`
 	CitationTokens           *int `json:"citation_tokens,omitempty"`
diff --git a/core/schemas/mux.go b/core/schemas/mux.go
index d4b10da53..b43ba2f85 100644
--- a/core/schemas/mux.go
+++ b/core/schemas/mux.go
@@ -651,12 +651,15 @@ func (cu *BifrostLLMUsage) ToResponsesResponseUsage() *ResponsesResponseUsage {
 
 	if cu.PromptTokensDetails != nil {
 		usage.InputTokensDetails = &ResponsesResponseInputTokens{
+			TextTokens:   cu.PromptTokensDetails.TextTokens,
 			AudioTokens:  cu.PromptTokensDetails.AudioTokens,
+			ImageTokens:  cu.PromptTokensDetails.ImageTokens,
 			CachedTokens: cu.PromptTokensDetails.CachedTokens,
 		}
 	}
 	if cu.CompletionTokensDetails != nil {
 		usage.OutputTokensDetails = &ResponsesResponseOutputTokens{
+			TextTokens:               cu.CompletionTokensDetails.TextTokens,
 			AcceptedPredictionTokens: cu.CompletionTokensDetails.AcceptedPredictionTokens,
 			AudioTokens:              cu.CompletionTokensDetails.AudioTokens,
 			ReasoningTokens:          cu.CompletionTokensDetails.ReasoningTokens,
@@ -684,12 +687,15 @@ func (ru *ResponsesResponseUsage) ToBifrostLLMUsage() *BifrostLLMUsage {
 
 	if ru.InputTokensDetails != nil {
 		usage.PromptTokensDetails = &ChatPromptTokensDetails{
+			TextTokens:   ru.InputTokensDetails.TextTokens,
 			AudioTokens:  ru.InputTokensDetails.AudioTokens,
+			ImageTokens:  ru.InputTokensDetails.ImageTokens,
 			CachedTokens: ru.InputTokensDetails.CachedTokens,
 		}
 	}
 	if ru.OutputTokensDetails != nil {
 		usage.CompletionTokensDetails = &ChatCompletionTokensDetails{
+			TextTokens:               ru.OutputTokensDetails.TextTokens,
 			AcceptedPredictionTokens: ru.OutputTokensDetails.AcceptedPredictionTokens,
 			AudioTokens:              ru.OutputTokensDetails.AudioTokens,
 			ReasoningTokens:          ru.OutputTokensDetails.ReasoningTokens,
diff --git a/core/schemas/responses.go b/core/schemas/responses.go
index cf0d83582..4e1de267c 100644
--- a/core/schemas/responses.go
+++ b/core/schemas/responses.go
@@ -265,14 +265,17 @@ type ResponsesResponseUsage struct {
 }
 
 type ResponsesResponseInputTokens struct {
-	AudioTokens int `json:"audio_tokens"` // Tokens for audio input
+	TextTokens  int `json:"text_tokens,omitempty"`  // Tokens for text input
+	AudioTokens int `json:"audio_tokens,omitempty"` // Tokens for audio input
+	ImageTokens int `json:"image_tokens,omitempty"` // Tokens for image input
 
 	// For Providers which follow OpenAI's spec, CachedTokens means the number of input tokens read from the cache+input tokens used to create the cache entry. (because they do not differentiate between cache creation and cache read tokens)
 	// For Providers which do not follow OpenAI's spec, CachedTokens means only the number of input tokens read from the cache.
-	CachedTokens int `json:"cached_tokens"`
+	CachedTokens int `json:"cached_tokens,omitempty"`
 }
 
 type ResponsesResponseOutputTokens struct {
+	TextTokens               int  `json:"text_tokens,omitempty"`
 	AcceptedPredictionTokens int  `json:"accepted_prediction_tokens,omitempty"`
 	AudioTokens              int  `json:"audio_tokens,omitempty"`
 	ReasoningTokens          int  `json:"reasoning_tokens"` // Required for few OpenAI models
diff --git a/core/version b/core/version
index adf1ebc44..84da4213b 100644
--- a/core/version
+++ b/core/version
@@ -1 +1 @@
-1.2.38
\ No newline at end of file
+1.2.39
\ No newline at end of file
diff --git a/docs/contributing/setting-up-repo.mdx b/docs/contributing/setting-up-repo.mdx
index 57f42b2df..b8b751d88 100644
--- a/docs/contributing/setting-up-repo.mdx
+++ b/docs/contributing/setting-up-repo.mdx
@@ -9,7 +9,7 @@ This guide walks you through setting up the Bifrost repository for local develop
 ## Prerequisites
 
 Before setting up the repository, ensure you have the following tools installed:
-- [Go](https://go.dev/doc/install) (1.24.3)
+- [Go](https://go.dev/doc/install) (1.25.5)
 - [Node.js](https://nodejs.org/en/download) (>= 18.0.0) and npm
 - [Make](/deployment-guides/how-to/install-make)
 - [Docker](https://www.docker.com) (optional, for containerized development)
diff --git a/docs/plugins/building-dynamic-binary.mdx b/docs/plugins/building-dynamic-binary.mdx
index fd0d895ad..a67151630 100644
--- a/docs/plugins/building-dynamic-binary.mdx
+++ b/docs/plugins/building-dynamic-binary.mdx
@@ -114,7 +114,7 @@ RUN npx next build
 RUN node scripts/fix-paths.js
 
 # --- Go Build Stage: Compile the Go binary ---
-FROM golang:1.24.3-alpine3.22 AS builder
+FROM golang:1.25.5-alpine3.22 AS builder
 WORKDIR /app
 
 # Install dependencies including gcc for CGO and sqlite
@@ -230,7 +230,7 @@ RUN npx next build
 RUN node scripts/fix-paths.js
 
 # --- Go Build Stage: Compile the Go binary ---
-FROM golang:1.24.3-bookworm AS builder
+FROM golang:1.25.5-bookworm AS builder
 WORKDIR /app
 
 # Install dependencies including gcc for CGO and sqlite
@@ -380,12 +380,12 @@ error while loading shared libraries: libc.musl-x86_64.so.1: cannot open shared
 
 | Target Deployment | Build With | Dockerfile Base |
 |-------------------|------------|-----------------|
-| Alpine containers | musl | `golang:1.24.3-alpine3.22` |
-| Debian/Ubuntu containers | glibc | `golang:1.24.3-bookworm` |
-| Ubuntu/Debian servers | glibc | `golang:1.24.3-bookworm` |
+| Alpine containers | musl | `golang:1.25.5-alpine3.22` |
+| Debian/Ubuntu containers | glibc | `golang:1.25.5-bookworm` |
+| Ubuntu/Debian servers | glibc | `golang:1.25.5-bookworm` |
 | RHEL/CentOS servers | glibc | Native build or glibc container |
-| Kubernetes (Alpine) | musl | `golang:1.24.3-alpine3.22` |
-| Kubernetes (Debian) | glibc | `golang:1.24.3-bookworm` |
+| Kubernetes (Alpine) | musl | `golang:1.25.5-alpine3.22` |
+| Kubernetes (Debian) | glibc | `golang:1.25.5-bookworm` |
 
 **Simple rule:** Build with the same base OS family as your deployment target.
 
@@ -398,7 +398,7 @@ Plugins **must** be built with the **exact same environment** as your Bifrost bi
 docker run --rm \
   -v "$PWD:/work" \
   -w /work \
-  golang:1.24.3-alpine3.22 \
+  golang:1.25.5-alpine3.22 \
   sh -c "apk add --no-cache gcc musl-dev && \
          go build -buildmode=plugin -o myplugin.so main.go"
 
@@ -406,7 +406,7 @@ docker run --rm \
 docker run --rm \
   -v "$PWD:/work" \
   -w /work \
-  golang:1.24.3-bookworm \
+  golang:1.25.5-bookworm \
   sh -c "apt-get update && apt-get install -y gcc && \
          go build -buildmode=plugin -o myplugin.so main.go"
 ```
@@ -451,14 +451,14 @@ Test that your plugin loads successfully:
 
 ### Go Version Requirement
 
-Bifrost is built with **Go 1.24.3**. Your plugin **must** be compiled with the exact same Go version to ensure compatibility.
+Bifrost is built with **Go 1.25.5**. Your plugin **must** be compiled with the exact same Go version to ensure compatibility.
 
 ```bash
 # Check your Go version
 go version
-# Should output: go version go1.24.3 ...
+# Should output: go version go1.25.5 ...
 
-# If you need to install Go 1.24.3
+# If you need to install Go 1.25.5
 # Visit: https://go.dev/dl/
 ```
 
@@ -531,12 +531,12 @@ When creating a plugin, your `go.mod` should match Bifrost's Go version:
 ```go
 module github.com/example/my-plugin
 
-go 1.24.3
+go 1.25.5
 
 require (
-    github.com/maximhq/bifrost/core v1.2.26
+    github.com/maximhq/bifrost/core v1.2.38
     // Optional: Add framework for advanced features
-    // github.com/maximhq/bifrost/framework v1.1.33
+    // github.com/maximhq/bifrost/framework v1.1.48
     
     // Add other dependencies as needed, matching versions from Bifrost's go.mod files
     // github.com/bytedance/sonic v1.14.1
@@ -560,14 +560,14 @@ cannot load plugin: plugin was built with a different version of package runtime
 
 **Cause:** Plugin and Bifrost were built with different Go versions.
 
-**Solution:** Use the exact same Go version (Go 1.24.3) for both:
+**Solution:** Use the exact same Go version (Go 1.25.5) for both:
 
 ```bash
 # Check Go version used for Bifrost
 ./tmp/bifrost-http -version
 
 # Verify your Go version matches
-go version  # Should output: go version go1.24.3
+go version  # Should output: go version go1.25.5
 
 # See full compatibility requirements
 ```
@@ -639,10 +639,10 @@ Match Bifrost's exact Go version and key dependencies (see [Go Version and Packa
 
 ```bash
 # Pin Go version in Dockerfile
-FROM golang:1.24.3-alpine3.22 AS builder
+FROM golang:1.25.5-alpine3.22 AS builder
 
 # Pin Go version in Makefile/CI
-GO_VERSION=1.24.3
+GO_VERSION=1.25.5
 ```
 
 ### 3. Test Plugin Loading Locally
@@ -674,14 +674,14 @@ Build plugins in the same Dockerfile as Bifrost:
 
 ```dockerfile
 # Build plugin
-FROM golang:1.24.3-alpine3.22 AS plugin-builder
+FROM golang:1.25.5-alpine3.22 AS plugin-builder
 WORKDIR /plugin
 COPY plugins/myplugin/ .
 RUN apk add --no-cache gcc musl-dev && \
     go build -buildmode=plugin -o myplugin.so main.go
 
 # Build Bifrost
-FROM golang:1.24.3-alpine3.22 AS bifrost-builder
+FROM golang:1.25.5-alpine3.22 AS bifrost-builder
 # ... (bifrost build steps)
 
 # Runtime
diff --git a/docs/plugins/writing-plugin.mdx b/docs/plugins/writing-plugin.mdx
index 5565d6e30..8eb71bf5c 100644
--- a/docs/plugins/writing-plugin.mdx
+++ b/docs/plugins/writing-plugin.mdx
@@ -12,12 +12,12 @@ This guide walks you through creating a custom plugin for Bifrost using our [hel
 
 Before you start, ensure you have:
 
-- **Go 1.24+** installed (must match Bifrost's Go version)
+- **Go 1.25.5** installed (must match Bifrost's Go version)
 - **Linux or macOS** (Go plugins are not supported on Windows)
 - **Bifrost** installed and configured
 - Basic understanding of Go programming
 
-<Note>Make sure your go.mod has the go version pinned to 1.24.0</Note>
+<Note>Make sure your go.mod has the go version pinned to 1.25.5</Note>
 
 ## Project Structure
 
@@ -53,9 +53,9 @@ Your `go.mod` should look like this:
 ```go
 module github.com/yourusername/my-plugin
 
-go 1.24.0
+go 1.25.5
 
-require github.com/maximhq/bifrost/core v1.2.17
+require github.com/maximhq/bifrost/core v1.2.38
 ```
 
 ## Step 2: Implement the Plugin Interface
@@ -531,7 +531,7 @@ jobs:
       # 2. Setup Go
       - uses: actions/setup-go@v4
         with:
-          go-version: '1.24'
+          go-version: '1.25.5'
       
       # 3. Build Bifrost
       - name: Build Bifrost
diff --git a/examples/plugins/hello-world/Makefile b/examples/plugins/hello-world/Makefile
index 0337bc5d3..bbfbbba98 100644
--- a/examples/plugins/hello-world/Makefile
+++ b/examples/plugins/hello-world/Makefile
@@ -127,7 +127,7 @@ _build-with-docker: # Internal target for Docker-based cross-compilation
 			-e CGO_ENABLED=1 \
 			-e GOOS=$(TARGET_OS) \
 			-e GOARCH=$(TARGET_ARCH) \
-			golang:1.24.3-alpine3.22 \
+			golang:1.25.5-alpine3.22 \
 			sh -c "apk add --no-cache gcc musl-dev && \
 				go build -buildmode=plugin -ldflags='-w -s' -trimpath -o $(OUTPUT) main.go"; \
 		echo "$(COLOR_SUCCESS)✓ Plugin built successfully: $(OUTPUT) ($(TARGET_OS)/$(TARGET_ARCH))$(COLOR_RESET)"; \
diff --git a/framework/changelog.md b/framework/changelog.md
index e69de29bb..39a30a3ff 100644
--- a/framework/changelog.md
+++ b/framework/changelog.md
@@ -0,0 +1 @@
+- chore: update core version to 1.2.39
\ No newline at end of file
diff --git a/framework/modelcatalog/pricing.go b/framework/modelcatalog/pricing.go
index 49c80b5c5..a51ca4a2c 100644
--- a/framework/modelcatalog/pricing.go
+++ b/framework/modelcatalog/pricing.go
@@ -40,7 +40,15 @@ func (mc *ModelCatalog) CalculateCost(result *schemas.BifrostResponse) float64 {
 	case result.EmbeddingResponse != nil && result.EmbeddingResponse.Usage != nil:
 		usage = result.EmbeddingResponse.Usage
 	case result.SpeechResponse != nil:
-		return 0
+		if result.SpeechResponse.Usage != nil {
+			usage = &schemas.BifrostLLMUsage{
+				PromptTokens:     result.SpeechResponse.Usage.InputTokens,
+				CompletionTokens: result.SpeechResponse.Usage.OutputTokens,
+				TotalTokens:      result.SpeechResponse.Usage.TotalTokens,
+			}
+		} else {
+			return 0
+		}
 	case result.SpeechStreamResponse != nil && result.SpeechStreamResponse.Usage != nil:
 		usage = &schemas.BifrostLLMUsage{
 			PromptTokens:     result.SpeechStreamResponse.Usage.InputTokens,
@@ -65,6 +73,9 @@ func (mc *ModelCatalog) CalculateCost(result *schemas.BifrostResponse) float64 {
 			audioTokenDetails.AudioTokens = result.TranscriptionResponse.Usage.InputTokenDetails.AudioTokens
 			audioTokenDetails.TextTokens = result.TranscriptionResponse.Usage.InputTokenDetails.TextTokens
 		}
+		if result.TranscriptionResponse.Usage.Seconds != nil {
+			audioSeconds = result.TranscriptionResponse.Usage.Seconds
+		}
 	case result.TranscriptionStreamResponse != nil && result.TranscriptionStreamResponse.Usage != nil:
 		usage = &schemas.BifrostLLMUsage{}
 		if result.TranscriptionStreamResponse.Usage.InputTokens != nil {
@@ -83,6 +94,9 @@ func (mc *ModelCatalog) CalculateCost(result *schemas.BifrostResponse) float64 {
 			audioTokenDetails.AudioTokens = result.TranscriptionStreamResponse.Usage.InputTokenDetails.AudioTokens
 			audioTokenDetails.TextTokens = result.TranscriptionStreamResponse.Usage.InputTokenDetails.TextTokens
 		}
+		if result.TranscriptionStreamResponse.Usage.Seconds != nil {
+			audioSeconds = result.TranscriptionStreamResponse.Usage.Seconds
+		}
 	default:
 		return 0
 	}
diff --git a/framework/streaming/chat.go b/framework/streaming/chat.go
index 185a45019..36f3b9717 100644
--- a/framework/streaming/chat.go
+++ b/framework/streaming/chat.go
@@ -103,6 +103,36 @@ func (a *Accumulator) buildCompleteMessageFromChatStreamChunks(chunks []*ChatStr
 				}
 			}
 		}
+		// Handle audio data - accumulate audio data and transcript
+		if chunk.Delta.Audio != nil {
+			if completeMessage.ChatAssistantMessage == nil {
+				completeMessage.ChatAssistantMessage = &schemas.ChatAssistantMessage{}
+			}
+			if completeMessage.ChatAssistantMessage.Audio == nil {
+				// First chunk with audio - initialize
+				completeMessage.ChatAssistantMessage.Audio = &schemas.ChatAudioMessageAudio{
+					ID:         chunk.Delta.Audio.ID,
+					Data:       chunk.Delta.Audio.Data,
+					ExpiresAt:  chunk.Delta.Audio.ExpiresAt,
+					Transcript: chunk.Delta.Audio.Transcript,
+				}
+			} else {
+				// Subsequent chunks - accumulate data and transcript
+				if chunk.Delta.Audio.Data != "" {
+					completeMessage.ChatAssistantMessage.Audio.Data += chunk.Delta.Audio.Data
+				}
+				if chunk.Delta.Audio.Transcript != "" {
+					completeMessage.ChatAssistantMessage.Audio.Transcript += chunk.Delta.Audio.Transcript
+				}
+				// Update ID and ExpiresAt if present (they should be consistent or final)
+				if chunk.Delta.Audio.ID != "" {
+					completeMessage.ChatAssistantMessage.Audio.ID = chunk.Delta.Audio.ID
+				}
+				if chunk.Delta.Audio.ExpiresAt != 0 {
+					completeMessage.ChatAssistantMessage.Audio.ExpiresAt = chunk.Delta.Audio.ExpiresAt
+				}
+			}
+		}
 		// Accumulate tool calls
 		if len(chunk.Delta.ToolCalls) > 0 {
 			a.accumulateToolCallsInMessage(completeMessage, chunk.Delta.ToolCalls)
diff --git a/framework/version b/framework/version
index 3361394de..b1471d3a9 100644
--- a/framework/version
+++ b/framework/version
@@ -1 +1 @@
-1.1.48
\ No newline at end of file
+1.1.49
\ No newline at end of file
diff --git a/helm-charts/bifrost/Chart.yaml b/helm-charts/bifrost/Chart.yaml
index e3533ea0b..2ecb610f1 100644
--- a/helm-charts/bifrost/Chart.yaml
+++ b/helm-charts/bifrost/Chart.yaml
@@ -2,8 +2,8 @@ apiVersion: v2
 name: bifrost
 description: A Helm chart for deploying Bifrost - AI Gateway with unified interface for multiple providers
 type: application
-version: 1.5.0
-appVersion: "1.5.0"
+version: 1.5.1
+appVersion: "1.5.1"
 keywords:
   - ai
   - gateway
diff --git a/helm-charts/index.yaml b/helm-charts/index.yaml
index b1489317e..74c1004d7 100644
--- a/helm-charts/index.yaml
+++ b/helm-charts/index.yaml
@@ -1,6 +1,28 @@
 apiVersion: v1
 entries:
   bifrost:
+  - apiVersion: v2
+    appVersion: 1.5.1
+    created: "2025-12-12T12:00:00.000000+00:00"
+    description: A Helm chart for deploying Bifrost - AI Gateway with unified interface
+      for multiple providers
+    digest: ""
+    home: https://www.getmaxim.ai/bifrost
+    icon: https://www.getmaxim.ai/bifrost/bifrost-logo-only.png
+    keywords:
+    - ai
+    - gateway
+    - llm
+    maintainers:
+    - email: akshay@getmaxim.ai
+      name: Bifrost Team
+    name: bifrost
+    sources:
+    - https://github.com/maximhq/bifrost
+    type: application
+    urls:
+    - https://maximhq.github.io/bifrost/helm-charts/bifrost-1.5.1.tgz
+    version: 1.5.1
   - apiVersion: v2
     appVersion: 1.5.0
     created: "2025-12-11T12:00:00.000000+00:00"
diff --git a/plugins/governance/changelog.md b/plugins/governance/changelog.md
index e69de29bb..5ef197d42 100644
--- a/plugins/governance/changelog.md
+++ b/plugins/governance/changelog.md
@@ -0,0 +1 @@
+- chore: update core version to 1.2.39 and framework version to 1.1.49
\ No newline at end of file
diff --git a/plugins/governance/version b/plugins/governance/version
index 8229aaebc..16e50f0b4 100644
--- a/plugins/governance/version
+++ b/plugins/governance/version
@@ -1 +1 @@
-1.3.49
\ No newline at end of file
+1.3.50
\ No newline at end of file
diff --git a/plugins/jsonparser/changelog.md b/plugins/jsonparser/changelog.md
index e69de29bb..5ef197d42 100644
--- a/plugins/jsonparser/changelog.md
+++ b/plugins/jsonparser/changelog.md
@@ -0,0 +1 @@
+- chore: update core version to 1.2.39 and framework version to 1.1.49
\ No newline at end of file
diff --git a/plugins/jsonparser/version b/plugins/jsonparser/version
index 8229aaebc..16e50f0b4 100644
--- a/plugins/jsonparser/version
+++ b/plugins/jsonparser/version
@@ -1 +1 @@
-1.3.49
\ No newline at end of file
+1.3.50
\ No newline at end of file
diff --git a/plugins/logging/changelog.md b/plugins/logging/changelog.md
index e69de29bb..5ef197d42 100644
--- a/plugins/logging/changelog.md
+++ b/plugins/logging/changelog.md
@@ -0,0 +1 @@
+- chore: update core version to 1.2.39 and framework version to 1.1.49
\ No newline at end of file
diff --git a/plugins/logging/version b/plugins/logging/version
index 8229aaebc..16e50f0b4 100644
--- a/plugins/logging/version
+++ b/plugins/logging/version
@@ -1 +1 @@
-1.3.49
\ No newline at end of file
+1.3.50
\ No newline at end of file
diff --git a/plugins/maxim/changelog.md b/plugins/maxim/changelog.md
index e69de29bb..5ef197d42 100644
--- a/plugins/maxim/changelog.md
+++ b/plugins/maxim/changelog.md
@@ -0,0 +1 @@
+- chore: update core version to 1.2.39 and framework version to 1.1.49
\ No newline at end of file
diff --git a/plugins/maxim/version b/plugins/maxim/version
index 0750769ee..aa8e212a6 100644
--- a/plugins/maxim/version
+++ b/plugins/maxim/version
@@ -1 +1 @@
-1.4.49
\ No newline at end of file
+1.4.50
\ No newline at end of file
diff --git a/plugins/mocker/changelog.md b/plugins/mocker/changelog.md
index e69de29bb..5ef197d42 100644
--- a/plugins/mocker/changelog.md
+++ b/plugins/mocker/changelog.md
@@ -0,0 +1 @@
+- chore: update core version to 1.2.39 and framework version to 1.1.49
\ No newline at end of file
diff --git a/plugins/mocker/version b/plugins/mocker/version
index bba60c093..8229aaebc 100644
--- a/plugins/mocker/version
+++ b/plugins/mocker/version
@@ -1 +1 @@
-1.3.48
\ No newline at end of file
+1.3.49
\ No newline at end of file
diff --git a/plugins/otel/changelog.md b/plugins/otel/changelog.md
index e69de29bb..5ef197d42 100644
--- a/plugins/otel/changelog.md
+++ b/plugins/otel/changelog.md
@@ -0,0 +1 @@
+- chore: update core version to 1.2.39 and framework version to 1.1.49
\ No newline at end of file
diff --git a/plugins/otel/version b/plugins/otel/version
index c30197954..feca5b25f 100644
--- a/plugins/otel/version
+++ b/plugins/otel/version
@@ -1 +1 @@
-1.0.48
\ No newline at end of file
+1.0.49
\ No newline at end of file
diff --git a/plugins/semanticcache/changelog.md b/plugins/semanticcache/changelog.md
index e69de29bb..5ef197d42 100644
--- a/plugins/semanticcache/changelog.md
+++ b/plugins/semanticcache/changelog.md
@@ -0,0 +1 @@
+- chore: update core version to 1.2.39 and framework version to 1.1.49
\ No newline at end of file
diff --git a/plugins/semanticcache/version b/plugins/semanticcache/version
index bba60c093..8229aaebc 100644
--- a/plugins/semanticcache/version
+++ b/plugins/semanticcache/version
@@ -1 +1 @@
-1.3.48
\ No newline at end of file
+1.3.49
\ No newline at end of file
diff --git a/plugins/telemetry/changelog.md b/plugins/telemetry/changelog.md
index e69de29bb..5ef197d42 100644
--- a/plugins/telemetry/changelog.md
+++ b/plugins/telemetry/changelog.md
@@ -0,0 +1 @@
+- chore: update core version to 1.2.39 and framework version to 1.1.49
\ No newline at end of file
diff --git a/plugins/telemetry/version b/plugins/telemetry/version
index bba60c093..8229aaebc 100644
--- a/plugins/telemetry/version
+++ b/plugins/telemetry/version
@@ -1 +1 @@
-1.3.48
\ No newline at end of file
+1.3.49
\ No newline at end of file
diff --git a/transports/Dockerfile b/transports/Dockerfile
index 3e9a1f24c..4224d5fc6 100644
--- a/transports/Dockerfile
+++ b/transports/Dockerfile
@@ -15,7 +15,7 @@ RUN node scripts/fix-paths.js
 # Skip the copy-build step since we'll copy the files in the Go build stage
 
 # --- Go Build Stage: Compile the Go binary ---
-FROM golang:1.24.3-alpine3.22 AS builder
+FROM golang:1.25.5-alpine3.22 AS builder
 WORKDIR /app
 
 # Install dependencies including gcc for CGO and sqlite
diff --git a/transports/changelog.md b/transports/changelog.md
index f5e4406b5..d685d32d0 100644
--- a/transports/changelog.md
+++ b/transports/changelog.md
@@ -6,6 +6,7 @@
 - chore: increased provider-level timeout limit to 48 hours
 - chore: bumped up Go version to 1.25.5
 - docs: updates key management links for integration docs - [@Georgehe4](https://github.com/Georgehe4)
+- chore: update core version to 1.2.39 and framework version to 1.1.49
 - feat: prompt caching support for anthropic and bedrock(claude and nova models)
 - feat: reasoning support for bedrock nova 2 models
 - docs: updated langchain docs for reasoning and embedding
\ No newline at end of file
diff --git a/transports/version b/transports/version
index 8229aaebc..16e50f0b4 100644
--- a/transports/version
+++ b/transports/version
@@ -1 +1 @@
-1.3.49
\ No newline at end of file
+1.3.50
\ No newline at end of file
diff --git a/ui/app/workspace/logs/views/audioPlayer.tsx b/ui/app/workspace/logs/views/audioPlayer.tsx
index 82ab60d38..ec7215df9 100644
--- a/ui/app/workspace/logs/views/audioPlayer.tsx
+++ b/ui/app/workspace/logs/views/audioPlayer.tsx
@@ -2,15 +2,87 @@ import { Button } from "@/components/ui/button";
 import { Pause, Play, Download } from "lucide-react";
 import { useState } from "react";
 
-const AudioPlayer = ({ src }: { src: string }) => {
+interface AudioPlayerProps {
+	src: string;
+	format?: string; // Optional format: "mp3", "wav", "pcm16", etc.
+}
+
+const AudioPlayer = ({ src, format }: AudioPlayerProps) => {
 	const [isPlaying, setIsPlaying] = useState(false);
 	const [audio] = useState<HTMLAudioElement | null>(typeof window !== "undefined" ? new Audio() : null);
 	const [error, setError] = useState<string | null>(null);
 
-	const createAudioBlob = (base64Data: string): Blob | null => {
+	// Convert PCM16 to WAV format
+	const convertPCM16ToWAV = (pcmData: Uint8Array, sampleRate: number = 24000, numChannels: number = 1): Uint8Array => {
+		const bitsPerSample = 16;
+		const byteRate = (sampleRate * numChannels * bitsPerSample) / 8;
+		const blockAlign = (numChannels * bitsPerSample) / 8;
+		const dataSize = pcmData.length;
+		const fileSize = 36 + dataSize;
+
+		const wavBuffer = new ArrayBuffer(44 + dataSize);
+		const view = new DataView(wavBuffer);
+
+		// RIFF header
+		const writeString = (offset: number, string: string) => {
+			for (let i = 0; i < string.length; i++) {
+				view.setUint8(offset + i, string.charCodeAt(i));
+			}
+		};
+
+		writeString(0, "RIFF");
+		view.setUint32(4, fileSize, true);
+		writeString(8, "WAVE");
+
+		// fmt subchunk
+		writeString(12, "fmt ");
+		view.setUint32(16, 16, true); // Subchunk1Size
+		view.setUint16(20, 1, true); // AudioFormat (1 = PCM)
+		view.setUint16(22, numChannels, true); // NumChannels
+		view.setUint32(24, sampleRate, true); // SampleRate
+		view.setUint32(28, byteRate, true); // ByteRate
+		view.setUint16(32, blockAlign, true); // BlockAlign
+		view.setUint16(34, bitsPerSample, true); // BitsPerSample
+
+		// data subchunk
+		writeString(36, "data");
+		view.setUint32(40, dataSize, true);
+
+		// Copy PCM data
+		const wavArray = new Uint8Array(wavBuffer);
+		wavArray.set(pcmData, 44);
+
+		return wavArray;
+	};
+
+	const createAudioBlob = (base64Data: string, audioFormat?: string): Blob | null => {
 		try {
-			return new Blob([Uint8Array.from(atob(base64Data), (c) => c.charCodeAt(0))], {
-				type: "audio/mpeg",
+			const binaryString = atob(base64Data);
+			const pcmData = Uint8Array.from(binaryString, (c) => c.charCodeAt(0));
+
+			// Handle PCM16 format - convert to WAV
+			if (audioFormat === "pcm16" || audioFormat === "pcm_s16le_16") {
+				const wavData = convertPCM16ToWAV(pcmData);
+				// Create a new ArrayBuffer to ensure proper type
+				const buffer = new ArrayBuffer(wavData.length);
+				new Uint8Array(buffer).set(wavData);
+				return new Blob([buffer], {
+					type: "audio/wav",
+				});
+			}
+
+			// Handle other formats
+			let mimeType = "audio/mpeg"; // Default to MP3
+			if (audioFormat === "wav") {
+				mimeType = "audio/wav";
+			} else if (audioFormat === "ogg") {
+				mimeType = "audio/ogg";
+			} else if (audioFormat === "webm") {
+				mimeType = "audio/webm";
+			}
+
+			return new Blob([pcmData], {
+				type: mimeType,
 			});
 		} catch (err) {
 			console.error("Failed to decode audio data:", err);
@@ -26,7 +98,7 @@ const AudioPlayer = ({ src }: { src: string }) => {
 			audio.pause();
 			setIsPlaying(false);
 		} else {
-			const audioBlob = createAudioBlob(src);
+			const audioBlob = createAudioBlob(src, format);
 			if (!audioBlob) return;
 
 			const audioUrl = URL.createObjectURL(audioBlob);
@@ -48,14 +120,26 @@ const AudioPlayer = ({ src }: { src: string }) => {
 	const handleDownload = () => {
 		if (!src) return;
 
-		const audioBlob = createAudioBlob(src);
+		const audioBlob = createAudioBlob(src, format);
 		if (!audioBlob) return;
 
 		const audioUrl = URL.createObjectURL(audioBlob);
 
+		// Determine file extension based on format
+		let extension = "mp3";
+		if (format === "pcm16" || format === "pcm_s16le_16") {
+			extension = "wav";
+		} else if (format === "wav") {
+			extension = "wav";
+		} else if (format === "ogg") {
+			extension = "ogg";
+		} else if (format === "webm") {
+			extension = "webm";
+		}
+
 		const a = document.createElement("a");
 		a.href = audioUrl;
-		a.download = "speech-output.mp3";
+		a.download = `speech-output.${extension}`;
 		document.body.appendChild(a);
 		a.click();
 		document.body.removeChild(a);
diff --git a/ui/app/workspace/logs/views/filters.tsx b/ui/app/workspace/logs/views/filters.tsx
index 302f0c538..7f5ff0c6b 100644
--- a/ui/app/workspace/logs/views/filters.tsx
+++ b/ui/app/workspace/logs/views/filters.tsx
@@ -324,10 +324,10 @@ export function LogFilters({ filters, onFiltersChange, liveEnabled, onLiveToggle
 						<MoreVertical className="h-4 w-4" />
 					</Button>
 				</PopoverTrigger>
-				<PopoverContent className="w-[250px] bg-white p-2" align="end">
+				<PopoverContent className="bg-accent w-[250px] p-2" align="end">
 					<Command>
 						<CommandList>
-							<CommandItem className="cursor-pointer" onSelect={handleRecalculateCosts}>
+							<CommandItem className="hover:bg-accent/50 cursor-pointer" onSelect={handleRecalculateCosts}>
 								<Calculator className="text-muted-foreground size-4" />
 								<div className="flex flex-col">
 									<span className="text-sm">Recalculate costs</span>
diff --git a/ui/app/workspace/logs/views/logChatMessageView.tsx b/ui/app/workspace/logs/views/logChatMessageView.tsx
index d7424ff3d..5b0c821cf 100644
--- a/ui/app/workspace/logs/views/logChatMessageView.tsx
+++ b/ui/app/workspace/logs/views/logChatMessageView.tsx
@@ -1,9 +1,11 @@
 import { ChatMessage, ContentBlock } from "@/lib/types/logs";
 import { CodeEditor } from "./codeEditor";
 import { isJson, cleanJson } from "@/lib/utils/validation";
+import AudioPlayer from "./audioPlayer";
 
 interface LogChatMessageViewProps {
 	message: ChatMessage;
+	audioFormat?: string; // Optional audio format from request params
 }
 
 const renderContentBlock = (block: ContentBlock, index: number) => {
@@ -62,7 +64,7 @@ const renderContentBlock = (block: ContentBlock, index: number) => {
 	);
 };
 
-export default function LogChatMessageView({ message }: LogChatMessageViewProps) {
+export default function LogChatMessageView({ message, audioFormat }: LogChatMessageViewProps) {
 	return (
 		<div className="w-full rounded-sm border">
 			<div className="border-b px-6 py-2 text-sm font-medium">
@@ -178,6 +180,35 @@ export default function LogChatMessageView({ message }: LogChatMessageViewProps)
 					/>
 				</div>
 			)}
+
+			{/* Handle audio output */}
+			{message.audio && (
+				<div className="border-b last:border-b-0">
+					<div className="bg-muted/50 text-muted-foreground px-6 py-2 text-xs font-medium">Audio Output</div>
+					<div className="space-y-4 px-6 py-4">
+						{message.audio.transcript && (
+							<div className="space-y-2">
+								<div className="text-muted-foreground text-xs font-medium">Transcript:</div>
+								<div className="font-mono text-xs break-words whitespace-pre-wrap">{message.audio.transcript}</div>
+							</div>
+						)}
+						{message.audio.data && (
+							<div className="space-y-2">
+								<div className="text-muted-foreground text-xs font-medium">Audio:</div>
+								<AudioPlayer src={message.audio.data} format={audioFormat} />
+							</div>
+						)}
+						{message.audio.id && (
+							<div className="text-muted-foreground text-xs">
+								ID: {message.audio.id} | Expires:{" "}
+								{message.audio.expires_at && Number.isFinite(message.audio.expires_at)
+									? new Date(message.audio.expires_at * 1000).toLocaleString()
+									: "N/A"}
+							</div>
+						)}
+					</div>
+				</div>
+			)}
 		</div>
 	);
 }
diff --git a/ui/app/workspace/logs/views/logDetailsSheet.tsx b/ui/app/workspace/logs/views/logDetailsSheet.tsx
index 084c22a4b..57f673e5f 100644
--- a/ui/app/workspace/logs/views/logDetailsSheet.tsx
+++ b/ui/app/workspace/logs/views/logDetailsSheet.tsx
@@ -34,6 +34,10 @@ export function LogDetailSheet({ log, open, onOpenChange, handleDelete }: LogDet
 		} catch (ignored) {}
 	}
 
+	// Extract audio format from request params
+	// Format can be in params.audio?.format or params.extra_params?.audio?.format
+	const audioFormat = (log.params as any)?.audio?.format || (log.params as any)?.extra_params?.audio?.format || undefined;
+
 	return (
 		<Sheet open={open} onOpenChange={onOpenChange}>
 			<SheetContent className="dark:bg-card flex w-full flex-col gap-4 overflow-x-hidden bg-white p-8">
@@ -115,10 +119,22 @@ export function LogDetailSheet({ log, open, onOpenChange, handleDelete }: LogDet
 							{log.fallback_index > 0 && <LogEntryDetailsView className="w-full" label="Fallback Index" value={log.fallback_index} />}
 							{log.virtual_key && <LogEntryDetailsView className="w-full" label="Virtual Key" value={log.virtual_key.name} />}
 
+							{/* Display audio params if present */}
+							{(log.params as any)?.audio && (
+								<>
+									{(log.params as any).audio.format && (
+										<LogEntryDetailsView className="w-full" label="Audio Format" value={(log.params as any).audio.format} />
+									)}
+									{(log.params as any).audio.voice && (
+										<LogEntryDetailsView className="w-full" label="Audio Voice" value={(log.params as any).audio.voice} />
+									)}
+								</>
+							)}
+
 							{log.params &&
 								Object.keys(log.params).length > 0 &&
 								Object.entries(log.params)
-									.filter(([key]) => key !== "tools" && key !== "instructions")
+									.filter(([key]) => key !== "tools" && key !== "instructions" && key !== "audio")
 									.filter(([_, value]) => typeof value === "boolean" || typeof value === "number" || typeof value === "string")
 									.map(([key, value]) => <LogEntryDetailsView key={key} className="w-full" label={key} value={value} />)}
 						</div>
@@ -360,7 +376,7 @@ export function LogDetailSheet({ log, open, onOpenChange, handleDelete }: LogDet
 					<>
 						<div className="mt-4 w-full text-left text-sm font-medium">Conversation History</div>
 						{log.input_history.slice(0, -1).map((message, index) => (
-							<LogChatMessageView key={index} message={message} />
+							<LogChatMessageView key={index} message={message} audioFormat={audioFormat} />
 						))}
 					</>
 				)}
@@ -369,7 +385,7 @@ export function LogDetailSheet({ log, open, onOpenChange, handleDelete }: LogDet
 				{log.input_history && log.input_history.length > 0 && (
 					<>
 						<div className="mt-4 w-full text-left text-sm font-medium">Input</div>
-						<LogChatMessageView message={log.input_history[log.input_history.length - 1]} />
+						<LogChatMessageView message={log.input_history[log.input_history.length - 1]} audioFormat={audioFormat} />
 					</>
 				)}
 
@@ -388,7 +404,7 @@ export function LogDetailSheet({ log, open, onOpenChange, handleDelete }: LogDet
 								<div className="mt-4 flex w-full items-center gap-2">
 									<div className="text-sm font-medium">Response</div>
 								</div>
-								<LogChatMessageView message={log.output_message} />
+								<LogChatMessageView message={log.output_message} audioFormat={audioFormat} />
 							</>
 						)}
 						{log.responses_output && log.responses_output.length > 0 && !log.error_details?.error.message && (
diff --git a/ui/lib/constants/icons.tsx b/ui/lib/constants/icons.tsx
index c85d9cbe5..56b97e9f6 100644
--- a/ui/lib/constants/icons.tsx
+++ b/ui/lib/constants/icons.tsx
@@ -592,52 +592,7 @@ export const ProviderIcons = {
 		);
 	},
 	nebius: ({ size = "md", className = "" }: IconProps) => {
-		const resolvedSize = resolveSize(size);
-		return (
-			<svg
-				width={resolvedSize}
-				height={resolvedSize}
-				style={{ flex: "none", lineHeight: "1" }}
-				viewBox="0 0 264 36"
-				fill="none"
-				xmlns="http://www.w3.org/2000/svg"
-				className={className}
-			>
-				<title>Nebius Token Factory</title>
-				<path
-					d="M121.899 0H9.01006C4.03398 0 0 4.06399 0 9.07702V26.9228C0 31.9358 4.03398 35.9998 9.01006 35.9998H121.9C126.876 35.9998 130.909 31.9358 130.909 26.9228V9.07702C130.909 4.06399 126.875 0 121.9 0H121.899Z"
-					fill="#E0FF4F"
-				/>
-				<path
-					d="M110.878 9.43591C114.575 9.4798 120.338 10.1519 120.673 14.807H116.974C116.646 12.4494 111.421 12.3722 108.751 12.64C108.723 12.6412 105.532 12.7738 105.532 14.3646C105.532 14.3646 105.303 15.8093 108.488 16.1049C108.843 16.1374 113.499 16.5234 113.829 16.5511L114.075 16.5765C116.779 16.8536 118.195 17.0008 119.847 18.3285H119.843L119.842 18.3314C121.542 19.8092 121.815 23.1002 119.228 24.7963C116.882 26.3005 113.946 26.4883 111.237 26.5541C107.261 26.5541 101.566 25.7785 101.572 20.7689H105.229C105.345 21.9466 106.036 22.6532 107.24 22.9984C108.666 23.3966 110.042 23.4472 111.396 23.4564C114.482 23.4564 117.49 22.951 117.296 21.2045C117.119 19.6193 115.011 19.4574 114.015 19.3812H114.014C113.982 19.3789 113.951 19.3767 113.922 19.3744C113.282 19.3247 110.304 19.142 110.304 19.142C108.438 19.0069 107.079 18.8039 106.248 18.6527C104.442 18.3144 103.56 17.6244 102.981 17.1176C101.492 15.7171 101.261 12.5926 103.636 11.0248C105.755 9.64405 108.428 9.49595 110.878 9.43591ZM15.8887 9.44666C17.4911 9.44666 18.6219 10.3341 19.3135 11.9135L23.541 21.6752H23.542C24.6051 24.1247 26.606 23.8653 26.6328 23.8617C26.6328 25.2528 25.54 26.5529 23.9365 26.5531C22.333 26.5531 21.2033 25.6645 20.5117 24.0853L16.2822 14.3246C15.2216 11.8792 13.2241 12.1339 13.1924 12.1381C13.1924 10.7469 14.2863 9.44681 15.8887 9.44666ZM84.8525 9.68201V18.5267C84.8525 21.6128 86.7839 23.4388 90.1123 23.4388C93.4409 23.4388 95.4004 21.6128 95.4004 18.5267V9.68201H98.7559V18.474C98.7558 23.3588 95.3193 26.5521 90.1123 26.5521C84.9055 26.552 81.4971 23.3322 81.4971 18.474V9.68201H84.8525ZM63.9658 9.68201C66.8545 9.69932 70.5586 10.3541 70.5586 14.2172C70.5586 15.9131 69.8065 17.0595 68.9014 17.6576L68.9004 17.6586C69.8032 17.9287 71.5977 19.2013 71.5977 21.6039C71.5976 24.4025 69.9704 26.3119 64.9805 26.3119V26.306H54.166V9.68201H63.9658ZM13.1904 26.306H9.83203V16.1996C9.83203 12.3678 13.1466 12.1409 13.1904 12.1381V26.306ZM50.709 12.8256H36.8301V16.4086H49.2031V19.5521H36.8301V23.1615H50.709V26.306H33.4453V9.68201H50.709V12.8256ZM78.0381 26.306H74.6553V9.68201H78.0381V26.306ZM29.9912 19.8002C29.9912 23.6544 26.6393 23.8612 26.6318 23.8617V9.69373H29.9912V19.8002ZM57.5488 23.1615L65.0273 23.1605H65.0283C66.5927 23.1594 68.1006 22.9068 68.1006 21.3217C68.1002 19.7369 66.4364 19.4359 64.9795 19.4359V19.4261H57.5488V23.1615ZM57.5498 16.4799H65.0283C65.9449 16.4717 67.3103 16.1763 67.3105 14.7728C67.3105 13.369 66.1182 12.8227 64.5654 12.8226H57.5498V16.4799Z"
-					fill="#052B42"
-				/>
-				<path
-					d="M159.926 0H141.82C136.878 0 132.873 4.06399 132.873 9.07702V26.9228C132.873 31.9358 136.879 35.9998 141.82 35.9998H159.925C164.867 35.9998 168.872 31.9358 168.872 26.9228V9.07702C168.872 4.06399 164.865 0 159.925 0H159.926Z"
-					fill="#052B42"
-				/>
-				<path
-					d="M154.03 15.8648C153.755 15.8648 153.531 15.6408 153.531 15.3649V12.2938C153.531 11.907 153.218 11.5941 152.831 11.5941H149.759C149.483 11.5941 149.26 11.3702 149.26 11.0942V8.02314C149.26 7.63637 148.947 7.32349 148.56 7.32349H145.689C145.302 7.32349 144.989 7.63637 144.989 8.02314V10.8945C144.989 11.2813 145.302 11.5941 145.689 11.5941H148.76C149.036 11.5941 149.26 11.8181 149.26 12.0941V15.1651C149.26 15.5519 149.573 15.8648 149.96 15.8648H153.032C153.308 15.8648 153.531 16.0888 153.531 16.3647V19.6355C153.531 19.9115 153.308 20.1343 153.032 20.1355H149.96C149.573 20.1355 149.26 20.4483 149.26 20.8351V23.9062C149.26 24.1821 149.036 24.4061 148.76 24.4061H145.689C145.302 24.4061 144.989 24.719 144.989 25.1058V27.9771C144.989 28.3639 145.302 28.6768 145.689 28.6768H148.56C148.947 28.6768 149.26 28.3639 149.26 27.9771V24.906C149.26 24.6301 149.483 24.4073 149.759 24.4061H152.831C153.218 24.4061 153.531 24.0932 153.531 23.7065V20.6354C153.531 20.3594 153.755 20.1355 154.03 20.1355H157.102C157.488 20.1355 157.801 19.8226 157.801 19.4358V16.5645C157.801 16.1777 157.488 15.8648 157.102 15.8648H154.03Z"
-					fill="#E0FF4F"
-				/>
-				<path
-					d="M254.834 0.00012207H179.782C174.84 0.00012207 170.835 4.06412 170.835 9.07714V26.9229C170.835 31.936 174.841 36 179.782 36H254.833C259.774 36 263.779 31.936 263.779 26.9229V9.07714C263.779 4.06412 259.773 0.00012207 254.833 0.00012207H254.834Z"
-					fill="#052B42"
-				/>
-				<path
-					d="M227.362 19.9957C230.378 19.9958 232.306 21.6904 232.306 24.3307V24.4733C232.306 27.1135 230.378 28.7955 227.362 28.7955C224.347 28.7955 222.405 27.1135 222.405 24.4733V24.3307C222.405 21.6904 224.347 19.9957 227.362 19.9957ZM206.459 20.0153C210.592 20.0154 211.297 22.3729 211.395 22.8297C211.399 22.855 211.405 22.8795 211.408 22.9059L209.607 23.0309C209.606 23.0237 209.318 21.7931 207.101 21.5905C206.88 21.5674 206.665 21.5577 206.459 21.5612C204.426 21.6131 203.306 22.5748 203.306 24.2867V24.5123C203.306 26.2418 204.397 27.1756 206.459 27.2252C206.57 27.2298 206.683 27.234 206.802 27.234C209.456 27.2339 209.742 25.6611 209.745 25.6461L211.548 25.7369C211.548 25.7369 211.398 28.7769 206.752 28.777L206.751 28.778C206.651 28.778 206.556 28.7744 206.459 28.7721C203.529 28.7086 201.642 27.0419 201.642 24.4811V24.318C201.642 21.7505 203.528 20.0788 206.459 20.0153ZM200.784 28.6549H199.016L198.136 26.7096H193.633L192.753 28.6549H190.979L194.992 20.1442H196.776L200.784 28.6549ZM239.591 20.1412C241.563 20.1412 242.756 21.1665 242.756 22.8268V22.9821C242.756 24.3571 241.796 25.3299 240.148 25.5123L242.927 28.651H240.85L238.074 25.5387H235.895V28.651H234.273V20.1412H239.591ZM190.606 21.6071H183.773V23.8151H189.838V25.2653H183.773V28.65H182.155V20.1569H190.606V21.6071ZM221.268 21.5914H217.835V28.65H216.115V21.5914H212.683V20.1412H221.268V21.5914ZM248.498 23.7897L251.345 20.1403H253.35L249.312 25.316V28.65H247.695V25.316L243.646 20.1403H245.65L248.498 23.7897ZM227.362 21.4957C225.253 21.4957 224.049 22.5437 224.049 24.317V24.4987C224.049 26.2718 225.252 27.2936 227.362 27.2936C229.473 27.2935 230.662 26.2706 230.662 24.4987V24.317C230.662 22.5437 229.472 21.4958 227.362 21.4957ZM194.306 25.2555H197.463L195.884 21.7789L194.306 25.2555ZM235.896 24.0963H239.553C240.551 24.0963 241.109 23.6682 241.109 22.9166V22.7604C241.109 22.0078 240.551 21.5934 239.553 21.5934H235.896V24.0963ZM196.083 7.19299C199.104 7.19307 201.034 8.89121 201.034 11.5348V11.6783C201.034 14.3222 199.103 16.0083 196.083 16.0084C193.063 16.0084 191.117 14.3222 191.117 11.6783V11.5348C191.117 8.89 193.062 7.19299 196.083 7.19299ZM189.829 8.78284H186.392V15.8531H184.669V8.78284H181.23V7.33069H189.829V8.78284ZM222.742 8.79846H215.897V10.861H221.973V12.3131H215.897V14.401H222.742V15.8531H214.276V7.34631H222.742V8.79846ZM204.7 7.34631V10.8649H207.081L209.907 7.34631H212.03L208.524 11.5934L212.03 15.8512H209.907L207.079 12.3141H204.7V15.8512H203.08V7.34631H204.7ZM232.467 13.486V7.34631H234.088V15.8512H232.442L226.659 9.71643V15.8512H225.04V7.34631H226.684L232.467 13.486ZM196.083 8.69592C193.97 8.69592 192.765 9.74554 192.765 11.5221V11.7037C192.765 13.4794 193.97 14.5035 196.083 14.5035C198.196 14.5035 199.389 13.4794 199.389 11.7037V11.5221C199.388 9.74675 198.196 8.69599 196.083 8.69592Z"
-					fill="url(#paint0_linear_95_109)"
-				/>
-				<defs>
-					<linearGradient id="paint0_linear_95_109" x1="255.599" y1="4.90912" x2="181.308" y2="21.9274" gradientUnits="userSpaceOnUse">
-						<stop stop-color="#9E9DFF" />
-						<stop offset="0.351889" stop-color="#31FCF6" />
-						<stop offset="0.705275" stop-color="#8DFF62" />
-						<stop offset="1" stop-color="#E0FF4F" />
-					</linearGradient>
-				</defs>
-			</svg>
-		);
+		return <img src="/images/nebius.jpeg" alt="Nebius" className={className} />;
 	},
 } as const;
 
diff --git a/ui/lib/types/logs.ts b/ui/lib/types/logs.ts
index e73a6469b..35d9a0b6d 100644
--- a/ui/lib/types/logs.ts
+++ b/ui/lib/types/logs.ts
@@ -114,6 +114,14 @@ export interface ChatMessage {
 	tool_calls?: ToolCall[]; // For backward compatibility, tool calls are now in the content
 	reasoning?: string;
 	reasoning_details?: ReasoningDetails[];
+	audio?: ChatAudioMessageAudio;
+}
+
+export interface ChatAudioMessageAudio {
+	id: string;
+	data: string;
+	expires_at: number;
+	transcript: string;
 }
 
 export interface ReasoningDetails {