Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion core/internal/testutil/account.go
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ func (account *ComprehensiveTestAccount) GetConfigForProvider(providerKey schema
case schemas.Gemini:
return &schemas.ProviderConfig{
NetworkConfig: schemas.NetworkConfig{
DefaultRequestTimeoutInSeconds: 120,
DefaultRequestTimeoutInSeconds: 300,
MaxRetries: 10, // Gemini can be variable
RetryBackoffInitial: 750 * time.Millisecond,
RetryBackoffMax: 12 * time.Second,
Expand Down
10 changes: 6 additions & 4 deletions core/internal/testutil/transcription.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,21 @@ func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Con
name: "RoundTrip_Basic_MP3",
text: TTSTestTextBasic,
voiceType: "primary",
format: "mp3",
format: "wav",
responseFormat: bifrost.Ptr("json"),
},
{
name: "RoundTrip_Medium_MP3",
text: TTSTestTextMedium,
voiceType: "secondary",
format: "mp3",
format: "wav",
responseFormat: bifrost.Ptr("json"),
},
{
name: "RoundTrip_Technical_MP3",
text: TTSTestTextTechnical,
voiceType: "tertiary",
format: "mp3",
format: "wav",
responseFormat: bifrost.Ptr("json"),
},
Comment on lines 34 to 53
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Test names misleading: contain "_MP3" but format is "wav".

The test case names reference "MP3" (lines 34, 41, 48) but the format field is now "wav" (lines 37, 44, 51). This creates confusion about what format is actually being tested.

Update the test names to reflect the actual format:

 		{
-			name:           "RoundTrip_Basic_MP3",
+			name:           "RoundTrip_Basic_WAV",
 			text:           TTSTestTextBasic,
 			voiceType:      "primary",
 			format:         "wav",
 			responseFormat: bifrost.Ptr("json"),
 		},
 		{
-			name:           "RoundTrip_Medium_MP3",
+			name:           "RoundTrip_Medium_WAV",
 			text:           TTSTestTextMedium,
 			voiceType:      "secondary",
 			format:         "wav",
 			responseFormat: bifrost.Ptr("json"),
 		},
 		{
-			name:           "RoundTrip_Technical_MP3",
+			name:           "RoundTrip_Technical_WAV",
 			text:           TTSTestTextTechnical,
 			voiceType:      "tertiary",
 			format:         "wav",
 			responseFormat: bifrost.Ptr("json"),
 		},
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
name: "RoundTrip_Basic_MP3",
text: TTSTestTextBasic,
voiceType: "primary",
format: "mp3",
format: "wav",
responseFormat: bifrost.Ptr("json"),
},
{
name: "RoundTrip_Medium_MP3",
text: TTSTestTextMedium,
voiceType: "secondary",
format: "mp3",
format: "wav",
responseFormat: bifrost.Ptr("json"),
},
{
name: "RoundTrip_Technical_MP3",
text: TTSTestTextTechnical,
voiceType: "tertiary",
format: "mp3",
format: "wav",
responseFormat: bifrost.Ptr("json"),
},
{
name: "RoundTrip_Basic_WAV",
text: TTSTestTextBasic,
voiceType: "primary",
format: "wav",
responseFormat: bifrost.Ptr("json"),
},
{
name: "RoundTrip_Medium_WAV",
text: TTSTestTextMedium,
voiceType: "secondary",
format: "wav",
responseFormat: bifrost.Ptr("json"),
},
{
name: "RoundTrip_Technical_WAV",
text: TTSTestTextTechnical,
voiceType: "tertiary",
format: "wav",
responseFormat: bifrost.Ptr("json"),
},
🤖 Prompt for AI Agents
In core/internal/testutil/transcription.go around lines 34 to 53, the test case
names incorrectly include "_MP3" while the format field is "wav"; update the
name fields for each case to use "_WAV" (e.g., "RoundTrip_Basic_WAV",
"RoundTrip_Medium_WAV", "RoundTrip_Technical_WAV") so the test names accurately
reflect the format being tested and keep naming consistent with the format
value.

}
Expand All @@ -61,6 +61,8 @@ func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Con

// Step 1: Generate TTS audio
voice := GetProviderVoice(testConfig.Provider, tc.voiceType)
responseFormat := GetProviderResponseFormat(testConfig.Provider, tc.format)

ttsRequest := &schemas.BifrostSpeechRequest{
Provider: testConfig.Provider,
Model: testConfig.SpeechSynthesisModel,
Expand All @@ -71,7 +73,7 @@ func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Con
VoiceConfig: &schemas.SpeechVoiceInput{
Voice: &voice,
},
ResponseFormat: tc.format,
ResponseFormat: responseFormat,
},
Fallbacks: testConfig.TranscriptionFallbacks,
}
Expand Down
4 changes: 2 additions & 2 deletions core/internal/testutil/transcription_stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"testing"
"time"


bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
)
Expand Down Expand Up @@ -65,6 +64,7 @@ func RunTranscriptionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx conte

// Step 1: Generate TTS audio
voice := GetProviderVoice(testConfig.Provider, tc.voiceType)
responseFormat := GetProviderResponseFormat(testConfig.Provider, tc.format)
ttsRequest := &schemas.BifrostSpeechRequest{
Provider: testConfig.Provider,
Model: testConfig.SpeechSynthesisModel,
Expand All @@ -75,7 +75,7 @@ func RunTranscriptionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx conte
VoiceConfig: &schemas.SpeechVoiceInput{
Voice: &voice,
},
ResponseFormat: tc.format,
ResponseFormat: responseFormat,
},
Fallbacks: testConfig.TranscriptionFallbacks,
}
Expand Down
18 changes: 17 additions & 1 deletion core/internal/testutil/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,19 @@ func GetProviderVoice(provider schemas.ModelProvider, voiceType string) string {
}
}

// GetProviderResponseFormat returns the appropriate response format for speech synthesis based on the provider
// For Gemini, only "wav" format is supported, so we always return "wav" regardless of the requested format
func GetProviderResponseFormat(provider schemas.ModelProvider, requestedFormat string) string {
switch provider {
case schemas.Gemini:
// Gemini only supports wav format for speech synthesis
return "wav"
default:
// Other providers support the requested format
return requestedFormat
}
}

type SampleToolType string

const (
Expand Down Expand Up @@ -539,6 +552,9 @@ func GenerateTTSAudioForTest(ctx context.Context, t *testing.T, client *bifrost.
format = "mp3"
}

// Get the appropriate response format for the provider
responseFormat := GetProviderResponseFormat(provider, format)

req := &schemas.BifrostSpeechRequest{
Provider: provider,
Model: ttsModel,
Expand All @@ -547,7 +563,7 @@ func GenerateTTSAudioForTest(ctx context.Context, t *testing.T, client *bifrost.
VoiceConfig: &schemas.SpeechVoiceInput{
Voice: &voice,
},
ResponseFormat: format,
ResponseFormat: responseFormat,
},
}

Expand Down
4 changes: 2 additions & 2 deletions core/providers/gemini/gemini_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ func TestGemini(t *testing.T) {
MultipleImages: false,
CompleteEnd2End: true,
Embedding: true,
Transcription: false,
TranscriptionStream: false,
Transcription: true,
TranscriptionStream: true,
SpeechSynthesis: true,
SpeechSynthesisStream: true,
Reasoning: true,
Expand Down
17 changes: 3 additions & 14 deletions core/providers/gemini/transcription.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ func (request *GeminiGenerationRequest) ToBifrostTranscriptionRequest() *schemas
bifrostReq := &schemas.BifrostTranscriptionRequest{
Provider: provider,
Model: model,
Params: &schemas.TranscriptionParameters{
ExtraParams: make(map[string]interface{}),
},
}

// Extract audio data and prompt from contents
Expand Down Expand Up @@ -60,37 +63,23 @@ func (request *GeminiGenerationRequest) ToBifrostTranscriptionRequest() *schemas
File: audioData,
}

// Set parameters
if bifrostReq.Params == nil {
bifrostReq.Params = &schemas.TranscriptionParameters{}
}

// Set prompt if provided
if promptText != "" {
bifrostReq.Params.Prompt = &promptText
}

// Handle safety settings from request
if len(request.SafetySettings) > 0 {
if bifrostReq.Params.ExtraParams == nil {
bifrostReq.Params.ExtraParams = make(map[string]interface{})
}
bifrostReq.Params.ExtraParams["safety_settings"] = request.SafetySettings
}

// Handle cached content
if request.CachedContent != "" {
if bifrostReq.Params.ExtraParams == nil {
bifrostReq.Params.ExtraParams = make(map[string]interface{})
}
bifrostReq.Params.ExtraParams["cached_content"] = request.CachedContent
}

// Handle labels
if len(request.Labels) > 0 {
if bifrostReq.Params.ExtraParams == nil {
bifrostReq.Params.ExtraParams = make(map[string]interface{})
}
bifrostReq.Params.ExtraParams["labels"] = request.Labels
}

Expand Down