diff --git a/core/changelog.md b/core/changelog.md index a82103a25..aa03f339c 100644 --- a/core/changelog.md +++ b/core/changelog.md @@ -1,4 +1,6 @@ feat: send back raw request in extra fields feat: added support for reasoning in chat completions feat: enhanced reasoning support in responses api -enhancement: improved internal inter provider conversions for integrations \ No newline at end of file +enhancement: improved internal inter provider conversions for integrations +feat: switched to gemini native api +feat: fallback to supported request type for custom models used in integration \ No newline at end of file diff --git a/core/providers/cohere/chat.go b/core/providers/cohere/chat.go index 1d2391b10..3c0caf5ce 100644 --- a/core/providers/cohere/chat.go +++ b/core/providers/cohere/chat.go @@ -690,5 +690,11 @@ func (cm *CohereMessage) ToBifrostChatMessage() *schemas.ChatMessage { Content: messageContent, ChatAssistantMessage: assistantMessage, } + + if cm.Role == "tool" { + bifrostMessage.ChatToolMessage = &schemas.ChatToolMessage{ + ToolCallID: cm.ToolCallID, + } + } return bifrostMessage } diff --git a/core/providers/gemini/chat.go b/core/providers/gemini/chat.go index 81cac9249..458e1b67e 100644 --- a/core/providers/gemini/chat.go +++ b/core/providers/gemini/chat.go @@ -187,8 +187,8 @@ func (response *GenerateContentResponse) ToBifrostChatCompletionStream() (*schem candidate := response.Candidates[0] - // Determine if this is the last chunk based on finish reason or usage metadata - isLastChunk := candidate.FinishReason != "" && candidate.FinishReason != FinishReasonUnspecified + // Determine if this is the last chunk based on finish reason and usage metadata + isLastChunk := candidate.FinishReason != "" && response.UsageMetadata != nil // Create the streaming response streamResponse := &schemas.BifrostChatResponse{ @@ -256,15 +256,6 @@ func (response *GenerateContentResponse) ToBifrostChatCompletionStream() (*schem }, } - // Preserve thought signature if present (required for Gemini 3 Pro) - if len(part.ThoughtSignature) > 0 { - toolCall.ExtraContent = map[string]interface{}{ - "google": map[string]interface{}{ - "thought_signature": string(part.ThoughtSignature), - }, - } - } - toolCalls = append(toolCalls, toolCall) case part.FunctionResponse != nil: diff --git a/core/providers/gemini/gemini.go b/core/providers/gemini/gemini.go index f18ab271a..fb293a9e8 100644 --- a/core/providers/gemini/gemini.go +++ b/core/providers/gemini/gemini.go @@ -243,6 +243,12 @@ func (provider *GeminiProvider) ChatCompletion(ctx context.Context, key schemas. bifrostResponse.ExtraFields.ModelRequested = request.Model bifrostResponse.ExtraFields.Latency = latency.Milliseconds() + // Set raw request if enabled + if providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest) { + providerUtils.ParseAndSetRawRequest(&bifrostResponse.ExtraFields, jsonData) + } + + // Set raw response if enabled if providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse) { bifrostResponse.ExtraFields.RawResponse = rawResponse } @@ -291,6 +297,7 @@ func (provider *GeminiProvider) ChatCompletionStream(ctx context.Context, postHo jsonData, headers, provider.networkConfig.ExtraHeaders, + providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest), providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse), provider.GetProviderKey(), request.Model, @@ -308,6 +315,7 @@ func HandleGeminiChatCompletionStream( jsonBody []byte, headers map[string]string, extraHeaders map[string]string, + sendBackRawRequest bool, sendBackRawResponse bool, providerName schemas.ModelProvider, model string, @@ -378,6 +386,11 @@ func HandleGeminiChatCompletionStream( var modelName string for scanner.Scan() { + select { + case <-ctx.Done(): + return + default: + } line := scanner.Text() // Skip empty lines and comments @@ -473,6 +486,9 @@ func HandleGeminiChatCompletionStream( chunkIndex++ if isLastChunk { + if sendBackRawRequest { + providerUtils.ParseAndSetRawRequest(&response.ExtraFields, jsonBody) + } response.ExtraFields.Latency = time.Since(startTime).Milliseconds() ctx = context.WithValue(ctx, schemas.BifrostContextKeyStreamEndIndicator, true) providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, response, nil, nil, nil), responseChan) @@ -584,6 +600,7 @@ func (provider *GeminiProvider) ResponsesStream(ctx context.Context, postHookRun jsonData, headers, provider.networkConfig.ExtraHeaders, + providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest), providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse), provider.GetProviderKey(), request.Model, @@ -601,6 +618,7 @@ func HandleGeminiResponsesStream( jsonBody []byte, headers map[string]string, extraHeaders map[string]string, + sendBackRawRequest bool, sendBackRawResponse bool, providerName schemas.ModelProvider, model string, @@ -675,6 +693,11 @@ func HandleGeminiResponsesStream( var lastUsageMetadata *GenerateContentResponseUsageMetadata for scanner.Scan() { + select { + case <-ctx.Done(): + return + default: + } line := scanner.Text() // Skip empty lines and comments @@ -771,6 +794,9 @@ func HandleGeminiResponsesStream( } if isLastChunk { + if sendBackRawRequest { + providerUtils.ParseAndSetRawRequest(&response.ExtraFields, jsonBody) + } response.ExtraFields.Latency = time.Since(startTime).Milliseconds() ctx = context.WithValue(ctx, schemas.BifrostContextKeyStreamEndIndicator, true) providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, nil, response, nil, nil), responseChan) @@ -985,6 +1011,11 @@ func (provider *GeminiProvider) SpeechStream(ctx context.Context, postHookRunner lastChunkTime := startTime for scanner.Scan() { + select { + case <-ctx.Done(): + return + default: + } line := scanner.Text() // Skip empty lines @@ -1243,6 +1274,11 @@ func (provider *GeminiProvider) TranscriptionStream(ctx context.Context, postHoo var fullTranscriptionText string for scanner.Scan() { + select { + case <-ctx.Done(): + return + default: + } line := scanner.Text() // Skip empty lines diff --git a/core/providers/gemini/responses.go b/core/providers/gemini/responses.go index 72031a8cb..acf175bb9 100644 --- a/core/providers/gemini/responses.go +++ b/core/providers/gemini/responses.go @@ -2,7 +2,6 @@ package gemini import ( "encoding/base64" - "encoding/json" "fmt" "strings" "sync" @@ -176,13 +175,8 @@ func ToGeminiResponsesResponse(bifrostResp *schemas.BifrostResponsesResponse) *G // Determine the role role := "model" // default if msg.Role != nil { - switch *msg.Role { - case schemas.ResponsesInputMessageRoleAssistant: - role = "model" - case schemas.ResponsesInputMessageRoleUser: + if *msg.Role == schemas.ResponsesInputMessageRoleUser { role = "user" - default: - role = "model" } } @@ -240,7 +234,10 @@ func ToGeminiResponsesResponse(bifrostResp *schemas.BifrostResponsesResponse) *G // Check for thought signature in reasoning message if msg.ResponsesReasoning != nil && msg.ResponsesReasoning.EncryptedContent != nil { - part.ThoughtSignature = []byte(*msg.ResponsesReasoning.EncryptedContent) + decodedSig, err := base64.StdEncoding.DecodeString(*msg.ResponsesReasoning.EncryptedContent) + if err == nil { + part.ThoughtSignature = decodedSig + } } currentParts = append(currentParts, part) @@ -289,9 +286,12 @@ func ToGeminiResponsesResponse(bifrostResp *schemas.BifrostResponsesResponse) *G } } if msg.ResponsesReasoning.EncryptedContent != nil { - currentParts = append(currentParts, &Part{ - ThoughtSignature: []byte(*msg.ResponsesReasoning.EncryptedContent), - }) + decodedSig, err := base64.StdEncoding.DecodeString(*msg.ResponsesReasoning.EncryptedContent) + if err == nil { + currentParts = append(currentParts, &Part{ + ThoughtSignature: decodedSig, + }) + } } } } @@ -526,7 +526,8 @@ type GeminiResponsesStreamState struct { HasEmittedCompleted bool // Whether response.completed has been sent // Item tracking - CurrentOutputIndex int // Current output index + CurrentOutputIndex int // Current output index counter + TextOutputIndex int // Output index of the current text item (cached for reuse) ItemIDs map[int]string // Maps output_index to item ID TextItemClosed bool // Whether text item has been closed @@ -542,8 +543,9 @@ type GeminiResponsesStreamState struct { ResponseID *string // Gemini's responseId // Content tracking - HasStartedText bool // Whether we've started text content - HasStartedToolCall bool // Whether we've started a tool call + HasStartedText bool // Whether we've started text content + HasStartedToolCall bool // Whether we've started a tool call + TextBuffer strings.Builder // Accumulates text deltas for output_text.done } // geminiResponsesStreamStatePool provides a pool for Gemini responses stream state objects. @@ -555,6 +557,7 @@ var geminiResponsesStreamStatePool = sync.Pool{ ToolCallNames: make(map[int]string), ToolArgumentBuffers: make(map[int]string), CurrentOutputIndex: 0, + TextOutputIndex: -1, CreatedAt: int(time.Now().Unix()), HasEmittedCreated: false, HasEmittedInProgress: false, @@ -569,39 +572,7 @@ var geminiResponsesStreamStatePool = sync.Pool{ // acquireGeminiResponsesStreamState gets a Gemini responses stream state from the pool. func acquireGeminiResponsesStreamState() *GeminiResponsesStreamState { state := geminiResponsesStreamStatePool.Get().(*GeminiResponsesStreamState) - // Clear maps - if state.ItemIDs == nil { - state.ItemIDs = make(map[int]string) - } else { - clear(state.ItemIDs) - } - if state.ToolCallIDs == nil { - state.ToolCallIDs = make(map[int]string) - } else { - clear(state.ToolCallIDs) - } - if state.ToolCallNames == nil { - state.ToolCallNames = make(map[int]string) - } else { - clear(state.ToolCallNames) - } - if state.ToolArgumentBuffers == nil { - state.ToolArgumentBuffers = make(map[int]string) - } else { - clear(state.ToolArgumentBuffers) - } - // Reset other fields - state.CurrentOutputIndex = 0 - state.MessageID = nil - state.Model = nil - state.ResponseID = nil - state.CreatedAt = int(time.Now().Unix()) - state.HasEmittedCreated = false - state.HasEmittedInProgress = false - state.HasEmittedCompleted = false - state.TextItemClosed = false - state.HasStartedText = false - state.HasStartedToolCall = false + state.flush() return state } @@ -636,6 +607,7 @@ func (state *GeminiResponsesStreamState) flush() { clear(state.ToolArgumentBuffers) } state.CurrentOutputIndex = 0 + state.TextOutputIndex = -1 state.MessageID = nil state.Model = nil state.ResponseID = nil @@ -646,6 +618,32 @@ func (state *GeminiResponsesStreamState) flush() { state.TextItemClosed = false state.HasStartedText = false state.HasStartedToolCall = false + state.TextBuffer.Reset() +} + +// closeTextItemIfOpen closes the text item if it's open and returns the responses. +// Returns nil if no text item was open. +func (state *GeminiResponsesStreamState) closeTextItemIfOpen(sequenceNumber int) []*schemas.BifrostResponsesStreamResponse { + if state.HasStartedText && !state.TextItemClosed { + return closeGeminiTextItem(state, sequenceNumber) + } + return nil +} + +// nextOutputIndex returns the current output index and increments it for the next use. +func (state *GeminiResponsesStreamState) nextOutputIndex() int { + index := state.CurrentOutputIndex + state.CurrentOutputIndex++ + return index +} + +// generateItemID creates a unique item ID with the given suffix. +// Falls back to index-based ID if MessageID is nil. +func (state *GeminiResponsesStreamState) generateItemID(suffix string, outputIndex int) string { + if state.MessageID != nil { + return fmt.Sprintf("msg_%s_%s_%d", *state.MessageID, suffix, outputIndex) + } + return fmt.Sprintf("%s_%d", suffix, outputIndex) } // ToBifrostResponsesStream converts a Gemini stream event to Bifrost Responses Stream responses @@ -727,21 +725,21 @@ func processGeminiPart(part *Part, state *GeminiResponsesStreamState, sequenceNu var responses []*schemas.BifrostResponsesStreamResponse switch { + case part.Thought && part.Text != "": + // Reasoning/thinking content + responses = append(responses, processGeminiThoughtPart(part, state, sequenceNumber)...) case part.Text != "" && !part.Thought: // Regular text content responses = append(responses, processGeminiTextPart(part, state, sequenceNumber)...) - case part.Thought && part.Text != "": - // Reasoning/thinking content - responses = append(responses, processGeminiThoughtPart(part, state, sequenceNumber)...) + case part.FunctionCall != nil: + // Function call + responses = append(responses, processGeminiFunctionCallPart(part, state, sequenceNumber)...) case part.ThoughtSignature != nil: // Encrypted reasoning content (thoughtSignature) responses = append(responses, processGeminiThoughtSignaturePart(part, state, sequenceNumber)...) - case part.FunctionCall != nil: - // Function call - responses = append(responses, processGeminiFunctionCallPart(part, state, sequenceNumber)...) case part.FunctionResponse != nil: // Function response (tool result) responses = append(responses, processGeminiFunctionResponsePart(part, state, sequenceNumber)...) @@ -760,12 +758,12 @@ func processGeminiPart(part *Part, state *GeminiResponsesStreamState, sequenceNu func processGeminiTextPart(part *Part, state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse { var responses []*schemas.BifrostResponsesStreamResponse + var outputIndex int // If this is the first text, emit output_item.added and content_part.added if !state.HasStartedText { - outputIndex := 0 - state.CurrentOutputIndex = outputIndex - - itemID := fmt.Sprintf("msg_%s_item_%d", *state.MessageID, outputIndex) + outputIndex = state.nextOutputIndex() + state.TextOutputIndex = outputIndex // Cache the text item's output index + itemID := state.generateItemID("item", outputIndex) state.ItemIDs[outputIndex] = itemID // Emit output_item.added @@ -799,14 +797,20 @@ func processGeminiTextPart(part *Part, state *GeminiResponsesStreamState, sequen }) state.HasStartedText = true + } else { + // Text already started, reuse the cached text item's output index + outputIndex = state.TextOutputIndex } // Emit output_text.delta for the text content if part.Text != "" { - outputIndex := 0 itemID := state.ItemIDs[outputIndex] contentIndex := 0 text := part.Text + + // Accumulate text for output_text.done + state.TextBuffer.WriteString(text) + responses = append(responses, &schemas.BifrostResponsesStreamResponse{ Type: schemas.ResponsesStreamResponseTypeOutputTextDelta, SequenceNumber: sequenceNumber + len(responses), @@ -825,19 +829,13 @@ func processGeminiThoughtPart(part *Part, state *GeminiResponsesStreamState, seq var responses []*schemas.BifrostResponsesStreamResponse // Close text item if open - if state.HasStartedText && !state.TextItemClosed { - responses = append(responses, closeGeminiTextItem(state, sequenceNumber)...) + if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil { + responses = append(responses, closeResponses...) } // For Gemini thoughts/reasoning, we emit them as reasoning summary text deltas - // Initialize reasoning item if not already done - outputIndex := state.CurrentOutputIndex + 1 - if !state.HasStartedText { - outputIndex = 1 - } - state.CurrentOutputIndex = outputIndex - - itemID := fmt.Sprintf("msg_%s_reasoning_%d", *state.MessageID, outputIndex) + outputIndex := state.nextOutputIndex() + itemID := state.generateItemID("reasoning", outputIndex) state.ItemIDs[outputIndex] = itemID // Emit output_item.added for reasoning @@ -910,18 +908,13 @@ func processGeminiThoughtSignaturePart(part *Part, state *GeminiResponsesStreamS var responses []*schemas.BifrostResponsesStreamResponse // Close text item if open - if state.HasStartedText && !state.TextItemClosed { - responses = append(responses, closeGeminiTextItem(state, sequenceNumber)...) + if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil { + responses = append(responses, closeResponses...) } // Create a new reasoning item for the thought signature - outputIndex := state.CurrentOutputIndex + 1 - if !state.HasStartedText { - outputIndex = 1 - } - state.CurrentOutputIndex = outputIndex - - itemID := fmt.Sprintf("msg_%s_reasoning_%d", *state.MessageID, outputIndex) + outputIndex := state.nextOutputIndex() + itemID := state.generateItemID("reasoning", outputIndex) state.ItemIDs[outputIndex] = itemID // Convert thoughtSignature to string @@ -965,16 +958,12 @@ func processGeminiFunctionCallPart(part *Part, state *GeminiResponsesStreamState var responses []*schemas.BifrostResponsesStreamResponse // Close text item if open - if state.HasStartedText && !state.TextItemClosed { - responses = append(responses, closeGeminiTextItem(state, sequenceNumber)...) + if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil { + responses = append(responses, closeResponses...) } // Start new function call item - outputIndex := state.CurrentOutputIndex + 1 - if !state.HasStartedText { - outputIndex = 1 // If no text, start at index 1 - } - state.CurrentOutputIndex = outputIndex + outputIndex := state.nextOutputIndex() toolUseID := part.FunctionCall.ID if toolUseID == "" { @@ -988,7 +977,7 @@ func processGeminiFunctionCallPart(part *Part, state *GeminiResponsesStreamState // Convert args to JSON string argsJSON := "" if part.FunctionCall.Args != nil { - if argsBytes, err := json.Marshal(part.FunctionCall.Args); err == nil { + if argsBytes, err := sonic.Marshal(part.FunctionCall.Args); err == nil { argsJSON = string(argsBytes) } } @@ -1038,19 +1027,15 @@ func processGeminiFunctionResponsePart(part *Part, state *GeminiResponsesStreamS var responses []*schemas.BifrostResponsesStreamResponse // Close text item if open - if state.HasStartedText && !state.TextItemClosed { - responses = append(responses, closeGeminiTextItem(state, sequenceNumber)...) + if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil { + responses = append(responses, closeResponses...) } // Extract output from function response output := extractFunctionResponseOutput(part.FunctionResponse) // Create new output item for the function response - outputIndex := state.CurrentOutputIndex + 1 - if !state.HasStartedText { - outputIndex = 0 - } - state.CurrentOutputIndex = outputIndex + outputIndex := state.nextOutputIndex() responseID := part.FunctionResponse.ID if responseID == "" { @@ -1108,8 +1093,8 @@ func processGeminiInlineDataPart(part *Part, state *GeminiResponsesStreamState, var responses []*schemas.BifrostResponsesStreamResponse // Close text item if open - if state.HasStartedText && !state.TextItemClosed { - responses = append(responses, closeGeminiTextItem(state, sequenceNumber)...) + if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil { + responses = append(responses, closeResponses...) } // Convert inline data to content block @@ -1119,13 +1104,8 @@ func processGeminiInlineDataPart(part *Part, state *GeminiResponsesStreamState, } // Create new output item for the inline data - outputIndex := state.CurrentOutputIndex + 1 - if !state.HasStartedText { - outputIndex = 0 - } - state.CurrentOutputIndex = outputIndex - - itemID := fmt.Sprintf("msg_%s_item_%d", *state.MessageID, outputIndex) + outputIndex := state.nextOutputIndex() + itemID := state.generateItemID("item", outputIndex) state.ItemIDs[outputIndex] = itemID // Emit output_item.added with the inline data content block @@ -1186,8 +1166,8 @@ func processGeminiFileDataPart(part *Part, state *GeminiResponsesStreamState, se var responses []*schemas.BifrostResponsesStreamResponse // Close text item if open - if state.HasStartedText && !state.TextItemClosed { - responses = append(responses, closeGeminiTextItem(state, sequenceNumber)...) + if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil { + responses = append(responses, closeResponses...) } // Convert file data to content block @@ -1197,13 +1177,8 @@ func processGeminiFileDataPart(part *Part, state *GeminiResponsesStreamState, se } // Create new output item for the file data - outputIndex := state.CurrentOutputIndex + 1 - if !state.HasStartedText { - outputIndex = 0 - } - state.CurrentOutputIndex = outputIndex - - itemID := fmt.Sprintf("msg_%s_item_%d", *state.MessageID, outputIndex) + outputIndex := state.nextOutputIndex() + itemID := state.generateItemID("item", outputIndex) state.ItemIDs[outputIndex] = itemID // Emit output_item.added with the file data content block @@ -1263,19 +1238,19 @@ func processGeminiFileDataPart(part *Part, state *GeminiResponsesStreamState, se func closeGeminiTextItem(state *GeminiResponsesStreamState, sequenceNumber int) []*schemas.BifrostResponsesStreamResponse { var responses []*schemas.BifrostResponsesStreamResponse - outputIndex := 0 + outputIndex := state.TextOutputIndex itemID := state.ItemIDs[outputIndex] contentIndex := 0 // Emit output_text.done - emptyText := "" + fullText := state.TextBuffer.String() responses = append(responses, &schemas.BifrostResponsesStreamResponse{ Type: schemas.ResponsesStreamResponseTypeOutputTextDone, SequenceNumber: sequenceNumber + len(responses), OutputIndex: &outputIndex, ContentIndex: &contentIndex, ItemID: &itemID, - Text: &emptyText, + Text: &fullText, }) // Emit content_part.done @@ -1316,8 +1291,8 @@ func closeGeminiOpenItems(state *GeminiResponsesStreamState, usage *GenerateCont var responses []*schemas.BifrostResponsesStreamResponse // Close text item if still open - if state.HasStartedText && !state.TextItemClosed { - responses = append(responses, closeGeminiTextItem(state, sequenceNumber)...) + if closeResponses := state.closeTextItemIfOpen(sequenceNumber); closeResponses != nil { + responses = append(responses, closeResponses...) } // Close any open tool calls @@ -1658,22 +1633,6 @@ func convertGeminiCandidatesToResponsesOutput(candidates []*Candidate) []schemas for _, part := range candidate.Content.Parts { // Handle different types of parts switch { - case part.Text != "": - // Regular text message - msg := schemas.ResponsesMessage{ - Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant), - Content: &schemas.ResponsesMessageContent{ - ContentBlocks: []schemas.ResponsesMessageContentBlock{ - { - Type: schemas.ResponsesOutputMessageContentTypeText, - Text: &part.Text, - }, - }, - }, - Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), - } - messages = append(messages, msg) - case part.Thought: // Thinking/reasoning message if part.Text != "" { @@ -1692,12 +1651,28 @@ func convertGeminiCandidatesToResponsesOutput(candidates []*Candidate) []schemas messages = append(messages, msg) } + case part.Text != "": + // Regular text message + msg := schemas.ResponsesMessage{ + Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant), + Content: &schemas.ResponsesMessageContent{ + ContentBlocks: []schemas.ResponsesMessageContentBlock{ + { + Type: schemas.ResponsesOutputMessageContentTypeText, + Text: &part.Text, + }, + }, + }, + Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), + } + messages = append(messages, msg) + case part.FunctionCall != nil: // Function call message // Convert Args to JSON string if it's not already a string argumentsStr := "" if part.FunctionCall.Args != nil { - if argsBytes, err := json.Marshal(part.FunctionCall.Args); err == nil { + if argsBytes, err := sonic.Marshal(part.FunctionCall.Args); err == nil { argumentsStr = string(argsBytes) } } @@ -1723,7 +1698,7 @@ func convertGeminiCandidatesToResponsesOutput(candidates []*Candidate) []schemas // Preserve thought signature if present (required for Gemini 3 Pro) // Store it in a separate ResponsesReasoning message for better scalability if len(part.ThoughtSignature) > 0 { - thoughtSig := string(part.ThoughtSignature) + thoughtSig := base64.StdEncoding.EncodeToString(part.ThoughtSignature) reasoningMsg := schemas.ResponsesMessage{ Role: schemas.Ptr(schemas.ResponsesInputMessageRoleAssistant), Type: schemas.Ptr(schemas.ResponsesMessageTypeReasoning), @@ -1899,7 +1874,8 @@ func (r *GeminiGenerationRequest) convertParamsToGenerationConfigResponses(param config.ThinkingConfig = &GenerationConfigThinkingConfig{ IncludeThoughts: true, } - if params.Reasoning.Effort != nil { + // only set thinking level if max tokens is not set + if params.Reasoning.Effort != nil && params.Reasoning.MaxTokens == nil { switch *params.Reasoning.Effort { case "minimal", "low": config.ThinkingConfig.ThinkingLevel = ThinkingLevelLow @@ -2201,7 +2177,10 @@ func convertResponsesMessagesToGeminiContents(messages []schemas.ResponsesMessag nextMsg := messages[i+1] if nextMsg.Type != nil && *nextMsg.Type == schemas.ResponsesMessageTypeReasoning && nextMsg.ResponsesReasoning != nil && nextMsg.ResponsesReasoning.EncryptedContent != nil { - part.ThoughtSignature = []byte(*nextMsg.ResponsesReasoning.EncryptedContent) + decodedSig, err := base64.StdEncoding.DecodeString(*nextMsg.ResponsesReasoning.EncryptedContent) + if err == nil { + part.ThoughtSignature = decodedSig + } } } diff --git a/core/providers/gemini/types.go b/core/providers/gemini/types.go index a57b7fa92..aa7ae1fd0 100644 --- a/core/providers/gemini/types.go +++ b/core/providers/gemini/types.go @@ -8,6 +8,8 @@ import ( "reflect" "strings" "time" + + "github.com/bytedance/sonic" ) type Role string @@ -912,6 +914,46 @@ type GenerationConfigThinkingConfig struct { ThinkingLevel ThinkingLevel `json:"thinkingLevel,omitempty"` } +// Gemini API supports Camel case but genai sdk sends thinking fields as snake_case +// UnmarshalJSON implements custom JSON unmarshaling to support both camelCase and snake_case +func (tc *GenerationConfigThinkingConfig) UnmarshalJSON(data []byte) error { + // Define an auxiliary struct with both camelCase and snake_case tags + type Alias struct { + IncludeThoughts *bool `json:"includeThoughts"` + IncludeThoughtsSnake *bool `json:"include_thoughts"` + ThinkingBudget *int32 `json:"thinkingBudget"` + ThinkingBudgetSnake *int32 `json:"thinking_budget"` + ThinkingLevel *ThinkingLevel `json:"thinkingLevel"` + ThinkingLevelSnake *ThinkingLevel `json:"thinking_level"` + } + + var aux Alias + if err := sonic.Unmarshal(data, &aux); err != nil { + return err + } + + // Prefer camelCase, fall back to snake_case + if aux.IncludeThoughts != nil { + tc.IncludeThoughts = *aux.IncludeThoughts + } else if aux.IncludeThoughtsSnake != nil { + tc.IncludeThoughts = *aux.IncludeThoughtsSnake + } + + if aux.ThinkingBudget != nil { + tc.ThinkingBudget = aux.ThinkingBudget + } else if aux.ThinkingBudgetSnake != nil { + tc.ThinkingBudget = aux.ThinkingBudgetSnake + } + + if aux.ThinkingLevel != nil { + tc.ThinkingLevel = *aux.ThinkingLevel + } else if aux.ThinkingLevelSnake != nil { + tc.ThinkingLevel = *aux.ThinkingLevelSnake + } + + return nil +} + type ThinkingLevel string const ( diff --git a/core/providers/gemini/utils.go b/core/providers/gemini/utils.go index 1ea918ee8..dfe66ff0d 100644 --- a/core/providers/gemini/utils.go +++ b/core/providers/gemini/utils.go @@ -32,6 +32,9 @@ func (r *GeminiGenerationRequest) convertGenerationConfigToResponsesParameters() } if config.ThinkingConfig != nil { params.Reasoning = &schemas.ResponsesParametersReasoning{} + if strings.Contains(r.Model, "openai") { + params.Reasoning.Summary = schemas.Ptr("auto") + } if config.ThinkingConfig.ThinkingBudget != nil { params.Reasoning.MaxTokens = schemas.Ptr(int(*config.ThinkingConfig.ThinkingBudget)) } @@ -352,8 +355,7 @@ func convertParamsToGenerationConfig(params *schemas.ChatParameters, responseMod } if params.Reasoning.MaxTokens != nil { config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(*params.Reasoning.MaxTokens)) - } - if params.Reasoning.Effort != nil { + } else if params.Reasoning.Effort != nil { switch *params.Reasoning.Effort { case "minimal", "low": config.ThinkingConfig.ThinkingLevel = ThinkingLevelLow @@ -669,6 +671,11 @@ func convertBifrostMessagesToGemini(messages []schemas.ChatMessage) []Content { Parts: parts, Role: string(message.Role), } + if message.Role == schemas.ChatMessageRoleUser { + content.Role = "user" + } else { + content.Role = "model" + } contents = append(contents, content) } } diff --git a/core/providers/vertex/vertex.go b/core/providers/vertex/vertex.go index 8bde52de0..569feb49f 100644 --- a/core/providers/vertex/vertex.go +++ b/core/providers/vertex/vertex.go @@ -401,16 +401,16 @@ func (provider *VertexProvider) ChatCompletion(ctx context.Context, key schemas. completeURL = fmt.Sprintf("https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/mistralai/models/%s:rawPredict", region, projectID, region, deployment) } } else if schemas.IsGeminiModel(deployment) { + // Gemini models support api key + if key.Value != "" { + authQuery = fmt.Sprintf("key=%s", url.QueryEscape(key.Value)) + } if region == "global" { completeURL = fmt.Sprintf("https://aiplatform.googleapis.com/v1/projects/%s/locations/global/publishers/google/models/%s:generateContent", projectID, deployment) } else { completeURL = fmt.Sprintf("https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:generateContent", region, projectID, region, deployment) } } else { - // Other models use OpenAPI endpoint for gemini models - if key.Value != "" { - authQuery = fmt.Sprintf("key=%s", url.QueryEscape(key.Value)) - } if region == "global" { completeURL = fmt.Sprintf("https://aiplatform.googleapis.com/v1beta1/projects/%s/locations/global/endpoints/openapi/chat/completions", projectID) } else { @@ -501,7 +501,7 @@ func (provider *VertexProvider) ChatCompletion(ctx context.Context, key schemas. } else if schemas.IsGeminiModel(deployment) { geminiResponse := gemini.GenerateContentResponse{} - rawRequest, rawResponse, bifrostErr := providerUtils.HandleProviderResponse(resp.Body(), &geminiResponse, jsonBody, provider.sendBackRawRequest, provider.sendBackRawResponse) + rawRequest, rawResponse, bifrostErr := providerUtils.HandleProviderResponse(resp.Body(), &geminiResponse, jsonBody, providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest), providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse)) if bifrostErr != nil { return nil, bifrostErr } @@ -734,6 +734,7 @@ func (provider *VertexProvider) ChatCompletionStream(ctx context.Context, postHo jsonData, headers, provider.networkConfig.ExtraHeaders, + providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest), providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse), provider.GetProviderKey(), request.Model, @@ -962,6 +963,11 @@ func (provider *VertexProvider) Responses(ctx context.Context, key schemas.Key, return nil, providerUtils.NewConfigurationError("region is not set in key config", providerName) } + authQuery := "" + if key.Value != "" { + authQuery = fmt.Sprintf("key=%s", url.QueryEscape(key.Value)) + } + var url string if region == "global" { url = fmt.Sprintf("https://aiplatform.googleapis.com/v1/projects/%s/locations/global/publishers/google/models/%s:generateContent", projectID, deployment) @@ -979,16 +985,22 @@ func (provider *VertexProvider) Responses(ctx context.Context, key schemas.Key, req.Header.SetContentType("application/json") providerUtils.SetExtraHeaders(ctx, req, provider.networkConfig.ExtraHeaders, nil) - // Getting oauth2 token - tokenSource, err := getAuthTokenSource(key) - if err != nil { - return nil, providerUtils.NewBifrostOperationError("error creating auth token source", err, schemas.Vertex) - } - token, err := tokenSource.Token() - if err != nil { - return nil, providerUtils.NewBifrostOperationError("error getting token", err, schemas.Vertex) + // If auth query is set, add it to the URL + // Otherwise, get the oauth2 token and set the Authorization header + if authQuery != "" { + url = fmt.Sprintf("%s?%s", url, authQuery) + } else { + // Getting oauth2 token + tokenSource, err := getAuthTokenSource(key) + if err != nil { + return nil, providerUtils.NewBifrostOperationError("error creating auth token source", err, schemas.Vertex) + } + token, err := tokenSource.Token() + if err != nil { + return nil, providerUtils.NewBifrostOperationError("error getting token", err, schemas.Vertex) + } + req.Header.Set("Authorization", "Bearer "+token.AccessToken) } - req.Header.Set("Authorization", "Bearer "+token.AccessToken) req.SetRequestURI(url) req.SetBody(jsonBody) @@ -1009,7 +1021,7 @@ func (provider *VertexProvider) Responses(ctx context.Context, key schemas.Key, geminiResponse := &gemini.GenerateContentResponse{} - rawRequest, rawResponse, bifrostErr := providerUtils.HandleProviderResponse(resp.Body(), geminiResponse, jsonBody, provider.sendBackRawRequest, provider.sendBackRawResponse) + rawRequest, rawResponse, bifrostErr := providerUtils.HandleProviderResponse(resp.Body(), geminiResponse, jsonBody, providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest), providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse)) if bifrostErr != nil { return nil, bifrostErr } @@ -1024,9 +1036,13 @@ func (provider *VertexProvider) Responses(ctx context.Context, key schemas.Key, response.ExtraFields.ModelDeployment = deployment } + // Set raw response if enabled if providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse) { response.ExtraFields.RawResponse = rawResponse } + if request.Model != deployment { + response.ExtraFields.ModelDeployment = deployment + } if providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest) { response.ExtraFields.RawRequest = rawRequest @@ -1217,6 +1233,7 @@ func (provider *VertexProvider) ResponsesStream(ctx context.Context, postHookRun jsonData, headers, provider.networkConfig.ExtraHeaders, + providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest), providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse), provider.GetProviderKey(), request.Model, diff --git a/core/version b/core/version index 84e311877..f480e7993 100644 --- a/core/version +++ b/core/version @@ -1 +1 @@ -1.2.36 \ No newline at end of file +1.2.37 \ No newline at end of file diff --git a/framework/changelog.md b/framework/changelog.md index 9d970f454..6f9bb2023 100644 --- a/framework/changelog.md +++ b/framework/changelog.md @@ -1,3 +1,4 @@ feat: support raw response accumulation in stream accumulator feat: support raw request configuration and logging -feat: added support for reasoning accumulation in stream accumulator \ No newline at end of file +feat: added support for reasoning accumulation in stream accumulator +chore: updating core to 1.2.37 and framework to 1.1.47 \ No newline at end of file diff --git a/framework/version b/framework/version index e16ab4788..e20060401 100644 --- a/framework/version +++ b/framework/version @@ -1 +1 @@ -1.1.46 \ No newline at end of file +1.1.47 \ No newline at end of file diff --git a/plugins/governance/changelog.md b/plugins/governance/changelog.md index e69de29bb..b36cdb2ce 100644 --- a/plugins/governance/changelog.md +++ b/plugins/governance/changelog.md @@ -0,0 +1 @@ +- chore: updating core to 1.2.37 and framework to 1.1.47 \ No newline at end of file diff --git a/plugins/governance/version b/plugins/governance/version index fea650a6e..bba60c093 100644 --- a/plugins/governance/version +++ b/plugins/governance/version @@ -1 +1 @@ -1.3.47 \ No newline at end of file +1.3.48 \ No newline at end of file diff --git a/plugins/jsonparser/changelog.md b/plugins/jsonparser/changelog.md index e69de29bb..b36cdb2ce 100644 --- a/plugins/jsonparser/changelog.md +++ b/plugins/jsonparser/changelog.md @@ -0,0 +1 @@ +- chore: updating core to 1.2.37 and framework to 1.1.47 \ No newline at end of file diff --git a/plugins/jsonparser/version b/plugins/jsonparser/version index fea650a6e..bba60c093 100644 --- a/plugins/jsonparser/version +++ b/plugins/jsonparser/version @@ -1 +1 @@ -1.3.47 \ No newline at end of file +1.3.48 \ No newline at end of file diff --git a/plugins/logging/changelog.md b/plugins/logging/changelog.md index b43651aee..9df6479ce 100644 --- a/plugins/logging/changelog.md +++ b/plugins/logging/changelog.md @@ -1 +1,2 @@ -feat: support for raw request logging \ No newline at end of file +- feat: support for raw request logging +- chore: updating core to 1.2.37 and framework to 1.1.47 \ No newline at end of file diff --git a/plugins/logging/version b/plugins/logging/version index fea650a6e..bba60c093 100644 --- a/plugins/logging/version +++ b/plugins/logging/version @@ -1 +1 @@ -1.3.47 \ No newline at end of file +1.3.48 \ No newline at end of file diff --git a/plugins/maxim/changelog.md b/plugins/maxim/changelog.md index e69de29bb..b36cdb2ce 100644 --- a/plugins/maxim/changelog.md +++ b/plugins/maxim/changelog.md @@ -0,0 +1 @@ +- chore: updating core to 1.2.37 and framework to 1.1.47 \ No newline at end of file diff --git a/plugins/maxim/version b/plugins/maxim/version index cd41b2c0c..15539864f 100644 --- a/plugins/maxim/version +++ b/plugins/maxim/version @@ -1 +1 @@ -1.4.47 \ No newline at end of file +1.4.48 \ No newline at end of file diff --git a/plugins/mocker/changelog.md b/plugins/mocker/changelog.md index e69de29bb..b36cdb2ce 100644 --- a/plugins/mocker/changelog.md +++ b/plugins/mocker/changelog.md @@ -0,0 +1 @@ +- chore: updating core to 1.2.37 and framework to 1.1.47 \ No newline at end of file diff --git a/plugins/mocker/version b/plugins/mocker/version index 78c1c17f4..fea650a6e 100644 --- a/plugins/mocker/version +++ b/plugins/mocker/version @@ -1 +1 @@ -1.3.46 \ No newline at end of file +1.3.47 \ No newline at end of file diff --git a/plugins/otel/changelog.md b/plugins/otel/changelog.md index e69de29bb..b36cdb2ce 100644 --- a/plugins/otel/changelog.md +++ b/plugins/otel/changelog.md @@ -0,0 +1 @@ +- chore: updating core to 1.2.37 and framework to 1.1.47 \ No newline at end of file diff --git a/plugins/otel/version b/plugins/otel/version index 9e6b608e2..de0434d0b 100644 --- a/plugins/otel/version +++ b/plugins/otel/version @@ -1 +1 @@ -1.0.46 \ No newline at end of file +1.0.47 \ No newline at end of file diff --git a/plugins/semanticcache/changelog.md b/plugins/semanticcache/changelog.md index e69de29bb..b36cdb2ce 100644 --- a/plugins/semanticcache/changelog.md +++ b/plugins/semanticcache/changelog.md @@ -0,0 +1 @@ +- chore: updating core to 1.2.37 and framework to 1.1.47 \ No newline at end of file diff --git a/plugins/semanticcache/version b/plugins/semanticcache/version index 78c1c17f4..fea650a6e 100644 --- a/plugins/semanticcache/version +++ b/plugins/semanticcache/version @@ -1 +1 @@ -1.3.46 \ No newline at end of file +1.3.47 \ No newline at end of file diff --git a/plugins/telemetry/changelog.md b/plugins/telemetry/changelog.md index e69de29bb..b36cdb2ce 100644 --- a/plugins/telemetry/changelog.md +++ b/plugins/telemetry/changelog.md @@ -0,0 +1 @@ +- chore: updating core to 1.2.37 and framework to 1.1.47 \ No newline at end of file diff --git a/plugins/telemetry/version b/plugins/telemetry/version index 78c1c17f4..fea650a6e 100644 --- a/plugins/telemetry/version +++ b/plugins/telemetry/version @@ -1 +1 @@ -1.3.46 \ No newline at end of file +1.3.47 \ No newline at end of file diff --git a/tests/integrations/config.yml b/tests/integrations/config.yml index 043e7d723..80f6a54d3 100644 --- a/tests/integrations/config.yml +++ b/tests/integrations/config.yml @@ -41,6 +41,7 @@ providers: transcription: "whisper-1" embeddings: "text-embedding-3-small" streaming: "gpt-4o-mini" + thinking: "gpt-5.1" alternatives: - "gpt-4" - "gpt-4-turbo-preview" @@ -52,7 +53,7 @@ providers: vision: "claude-3-7-sonnet-20250219" tools: "claude-sonnet-4-5-20250929" streaming: "claude-sonnet-4-5-20250929" - thinking: "claude-sonnet-4-20250514" + thinking: "claude-opus-4-5" alternatives: - "claude-3-sonnet-20240229" - "claude-3-opus-20240229" @@ -63,6 +64,7 @@ providers: chat: "gemini-2.5-flash" vision: "gemini-2.5-flash" tools: "gemini-2.5-flash" + thinking: "gemini-3-pro-preview" speech: "gemini-2.5-flash-preview-tts" transcription: "gemini-2.5-flash" embeddings: "gemini-embedding-001" @@ -78,7 +80,7 @@ providers: vision: "anthropic.claude-3-5-sonnet-20240620-v1:0" tools: "anthropic.claude-3-5-sonnet-20240620-v1:0" streaming: "anthropic.claude-3-5-sonnet-20240620-v1:0" - thinking: "anthropic.claude-3-5-sonnet-20240620-v1:0" + thinking: "us.anthropic.claude-opus-4-5-20251101-v1:0" text_completion: "mistral.mistral-7b-instruct-v0:2" embeddings: "cohere.embed-v4:0" alternatives: @@ -120,7 +122,7 @@ provider_scenarios: transcription: true transcription_streaming: true embeddings: true - thinking: false + thinking: true list_models: true responses: true responses_image: true @@ -170,7 +172,7 @@ provider_scenarios: transcription: true transcription_streaming: true embeddings: true - thinking: false + thinking: true list_models: true responses: true responses_image: true diff --git a/tests/integrations/pyproject.toml b/tests/integrations/pyproject.toml index 7411c897d..c242c0f0e 100644 --- a/tests/integrations/pyproject.toml +++ b/tests/integrations/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "langchain-mistralai>=0.1.0", "langgraph>=0.1.0", "mistralai>=0.4.0", - "google-genai>=1.0.0", + "google-genai>=1.50.0", "pydantic-ai>=0.1.0", "boto3>=1.34.0", # Testing utilities diff --git a/tests/integrations/tests/test_anthropic.py b/tests/integrations/tests/test_anthropic.py index f7bc2651f..d3e4b9dc8 100644 --- a/tests/integrations/tests/test_anthropic.py +++ b/tests/integrations/tests/test_anthropic.py @@ -262,7 +262,6 @@ def test_04_multiple_tool_calls(self, anthropic_client, test_config, provider, m made_relevant_calls ), f"Expected tool calls from {expected_tools}, got {tool_names}" - @skip_if_no_api_key("anthropic") @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("end2end_tool_calling")) def test_05_end2end_tool_calling(self, anthropic_client, test_config, provider, model): if provider == "_no_providers_" or model == "_no_model_": @@ -275,7 +274,7 @@ def test_05_end2end_tool_calling(self, anthropic_client, test_config, provider, model=format_provider_model(provider, model), messages=messages, tools=tools, - max_tokens=100, + max_tokens=500, ) assert_has_tool_calls(response, expected_count=1) @@ -329,7 +328,6 @@ def test_05_end2end_tool_calling(self, anthropic_client, test_config, provider, # If no content, that's ok - tool result was sufficient print("Model returned empty content - tool result was sufficient") - @skip_if_no_api_key("anthropic") @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("automatic_function_calling")) def test_06_automatic_function_calling(self, anthropic_client, test_config, provider, model): if provider == "_no_providers_" or model == "_no_model_": @@ -445,7 +443,6 @@ def test_09_multiple_images(self, anthropic_client, test_config, provider, model word in content for word in COMPARISON_KEYWORDS ), f"Response should contain comparison keywords. Got content: {content}" - @skip_if_no_api_key("anthropic") def test_10_complex_end2end(self, anthropic_client, test_config): """Test Case 10: Complex end-to-end with conversation, images, and tools""" messages = [ @@ -531,7 +528,6 @@ def test_10_complex_end2end(self, anthropic_client, test_config): # If no content, that's ok too - tool result was sufficient print("Model returned empty content - tool result was sufficient") - @skip_if_no_api_key("anthropic") def test_11_integration_specific_features(self, anthropic_client, test_config): """Test Case 11: Anthropic-specific features""" @@ -575,7 +571,6 @@ def test_11_integration_specific_features(self, anthropic_client, test_config): # Should prefer calculator for math question assert tool_calls[0]["name"] == "calculate" - @skip_if_no_api_key("anthropic") def test_12_error_handling_invalid_roles(self, anthropic_client, test_config): """Test Case 12: Error handling for invalid roles""" # bifrost handles invalid roles internally so this test should not raise an exception @@ -599,7 +594,7 @@ def test_13_streaming(self, anthropic_client, test_config, provider, model): stream = anthropic_client.messages.create( model=format_provider_model(provider, model), messages=STREAMING_CHAT_MESSAGES, - max_tokens=200, + max_tokens=1000, stream=True, ) @@ -621,7 +616,7 @@ def test_13_streaming(self, anthropic_client, test_config, provider, model): stream_with_tools = anthropic_client.messages.create( model=format_provider_model(provider, tools_model), messages=STREAMING_TOOL_CALL_MESSAGES, - max_tokens=150, + max_tokens=1000, tools=convert_to_anthropic_tools([WEATHER_TOOL]), stream=True, ) @@ -634,7 +629,6 @@ def test_13_streaming(self, anthropic_client, test_config, provider, model): assert chunk_count_tools > 0, "Should receive at least one chunk with tools" assert tool_calls_detected_tools, "Should receive at least one chunk with tools" - @skip_if_no_api_key("anthropic") def test_14_list_models(self, anthropic_client, test_config): """Test Case 14: List models with pagination parameters""" # Test basic list with limit @@ -669,19 +663,24 @@ def test_14_list_models(self, anthropic_client, test_config): ) assert prev_response.data is not None assert len(prev_response.data) <= 2 - - @skip_if_no_api_key("anthropic") - def test_15_extended_thinking(self, anthropic_client, test_config): + + @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("thinking")) + def test_15_extended_thinking(self, anthropic_client, test_config, provider, model): + if provider == "_no_providers_" or model == "_no_model_": + pytest.skip("No providers configured for this scenario") """Test Case 15: Extended thinking/reasoning (non-streaming)""" # Convert to Anthropic message format messages = convert_to_anthropic_messages(ANTHROPIC_THINKING_PROMPT) response = anthropic_client.messages.create( - model=get_model("anthropic", "chat"), # Specific thinking-capable model - max_tokens=16000, + model=format_provider_model(provider, model), # Specific thinking-capable model + max_tokens=4000, # Reduced to prevent token limit errors for smaller context window models thinking={ "type": "enabled", - "budget_tokens": 10000, + "budget_tokens": 2500, # Reduced to prevent token limit errors + }, + extra_body={ + "reasoning_summary": "detailed" }, messages=messages, ) @@ -744,20 +743,22 @@ def test_15_extended_thinking(self, anthropic_client, test_config): print(f"✓ Thinking content ({len(thinking_content)} chars): {thinking_content[:150]}...") print(f"✓ Response content: {regular_content[:100]}...") - - @skip_if_no_api_key("anthropic") - def test_16_extended_thinking_streaming(self, anthropic_client, test_config): + + @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("thinking")) + def test_16_extended_thinking_streaming(self, anthropic_client, test_config, provider, model): + if provider == "_no_providers_" or model == "_no_model_": + pytest.skip("No providers configured for this scenario") """Test Case 16: Extended thinking/reasoning (streaming)""" # Convert to Anthropic message format messages = convert_to_anthropic_messages(ANTHROPIC_THINKING_STREAMING_PROMPT) # Stream with thinking enabled - use thinking-capable model stream = anthropic_client.messages.create( - model="anthropic/claude-sonnet-4-5", - max_tokens=16000, + model=format_provider_model(provider, model), + max_tokens=4000, # Reduced to prevent token limit errors for smaller context window models thinking={ "type": "enabled", - "budget_tokens": 10000, + "budget_tokens": 2000, # Reduced to prevent token limit errors }, messages=messages, stream=True, diff --git a/tests/integrations/tests/test_bedrock.py b/tests/integrations/tests/test_bedrock.py index 44eea9b43..1c70597d3 100644 --- a/tests/integrations/tests/test_bedrock.py +++ b/tests/integrations/tests/test_bedrock.py @@ -163,6 +163,7 @@ class TestBedrockIntegration: @skip_if_no_api_key("bedrock") def test_01_text_completion_invoke(self, bedrock_client, test_config): + pytest.skip("Skipping text completion invoke test") model_id = get_model("bedrock", "text_completion") request_body = { @@ -211,7 +212,7 @@ def test_02_converse_with_tool_calling(self, bedrock_client, test_config, provid modelId=model_id, messages=messages, toolConfig=tool_config, - inferenceConfig={"maxTokens": 100} + inferenceConfig={"maxTokens": 500} ) assert_has_tool_calls(response, expected_count=1) @@ -250,7 +251,7 @@ def test_02_converse_with_tool_calling(self, bedrock_client, test_config, provid modelId=model_id, messages=messages, toolConfig=tool_config, - inferenceConfig={"maxTokens": 150} + inferenceConfig={"maxTokens": 500} ) # Validate response structure @@ -296,7 +297,7 @@ def test_03_image_analysis(self, bedrock_client, test_config, provider, model): response = bedrock_client.converse( modelId=model_id, messages=messages, - inferenceConfig={"maxTokens": 200} + inferenceConfig={"maxTokens": 500} ) # First validate basic response structure @@ -685,7 +686,7 @@ def test_10_end2end_tool_calling(self, bedrock_client, test_config, provider, mo modelId=model_id, messages=messages, toolConfig=tool_config, - inferenceConfig={"maxTokens": 100} + inferenceConfig={"maxTokens": 500} ) assert_has_tool_calls(response, expected_count=1) @@ -726,7 +727,7 @@ def test_10_end2end_tool_calling(self, bedrock_client, test_config, provider, mo modelId=model_id, messages=messages, toolConfig=tool_config, - inferenceConfig={"maxTokens": 150} + inferenceConfig={"maxTokens": 500} ) # Validate final response diff --git a/tests/integrations/tests/test_google.py b/tests/integrations/tests/test_google.py index 4798e6510..374cb9de6 100644 --- a/tests/integrations/tests/test_google.py +++ b/tests/integrations/tests/test_google.py @@ -475,7 +475,7 @@ def test_11_integration_specific_features(self, google_client, test_config): response1 = google_client.models.generate_content( model=get_model("google", "chat"), contents="Tell me a creative story in one sentence.", - config=types.GenerateContentConfig(temperature=0.9, max_output_tokens=100), + config=types.GenerateContentConfig(temperature=0.9, max_output_tokens=1000), ) assert_valid_chat_response(response1) @@ -502,7 +502,7 @@ def test_11_integration_specific_features(self, google_client, test_config): contents="high", config=types.GenerateContentConfig( system_instruction="I say high, you say low", - max_output_tokens=10, + max_output_tokens=500, ), ) @@ -887,7 +887,8 @@ def test_25_speech_generation_language_support(self, google_client, test_config, assert_valid_speech_response(wav_audio, expected_audio_size_min=1000) @skip_if_no_api_key("google") - def test_26_extended_thinking(self, google_client, test_config): + @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("thinking")) + def test_26_extended_thinking(self, google_client, test_config, provider, model): """Test Case 26: Extended thinking/reasoning (non-streaming)""" from google.genai import types @@ -896,14 +897,14 @@ def test_26_extended_thinking(self, google_client, test_config): # Use a thinking-capable model (Gemini 2.0+ supports thinking) response = google_client.models.generate_content( - model=get_model("google", "chat"), + model=format_provider_model(provider, model), contents=messages, config=types.GenerateContentConfig( thinking_config=types.ThinkingConfig( include_thoughts=True, - thinking_budget=5000, + thinking_budget=2000, ), - max_output_tokens=800, + max_output_tokens=2500, ), ) @@ -917,22 +918,22 @@ def test_26_extended_thinking(self, google_client, test_config): assert hasattr(candidate.content, "parts"), "Content should have parts" # Check for thoughts in usage metadata - has_thoughts = False - thoughts_token_count = 0 - - if hasattr(response, "usage_metadata"): - usage = response.usage_metadata - if hasattr(usage, "thoughts_token_count"): - thoughts_token_count = usage.thoughts_token_count - has_thoughts = thoughts_token_count > 0 - print(f"Found thoughts with {thoughts_token_count} tokens") - - # Should have thinking/thoughts tokens - assert has_thoughts, ( - f"Response should contain thoughts/reasoning tokens. " - f"Usage metadata: {response.usage_metadata if hasattr(response, 'usage_metadata') else 'None'}" - ) - assert thoughts_token_count > 0, "Thoughts token count should be greater than 0" + if provider == "gemini": + has_thoughts = False + thoughts_token_count = 0 + + if hasattr(response, "usage_metadata"): + usage = response.usage_metadata + if hasattr(usage, "thoughts_token_count"): + thoughts_token_count = usage.thoughts_token_count + has_thoughts = thoughts_token_count > 0 + print(f"Found thoughts with {thoughts_token_count} tokens") + + # Should have thinking/thoughts tokens + assert has_thoughts, ( + f"Response should contain thoughts/reasoning tokens. " + f"Usage metadata: {response.usage_metadata if hasattr(response, 'usage_metadata') else 'None'}" + ) # Validate that we have a response (even if thoughts aren't directly visible in parts) # In Gemini, thoughts are counted but may not be directly exposed in the response @@ -944,7 +945,6 @@ def test_26_extended_thinking(self, google_client, test_config): # Should have regular response text assert len(regular_text) > 0, "Should have regular response text" - print(f"✓ Thoughts used {thoughts_token_count} tokens") print(f"✓ Response content: {regular_text[:200]}...") # Validate the response makes sense for the problem @@ -963,7 +963,8 @@ def test_26_extended_thinking(self, google_client, test_config): ) @skip_if_no_api_key("google") - def test_27_extended_thinking_streaming(self, google_client, test_config): + @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("thinking")) + def test_27_extended_thinking_streaming(self, google_client, test_config, provider, model): """Test Case 27: Extended thinking/reasoning (streaming)""" from google.genai import types @@ -972,14 +973,14 @@ def test_27_extended_thinking_streaming(self, google_client, test_config): # Stream with thinking enabled stream = google_client.models.generate_content_stream( - model=get_model("google", "chat"), + model=format_provider_model(provider, model), contents=messages, config=types.GenerateContentConfig( thinking_config=types.ThinkingConfig( include_thoughts=True, - thinking_budget=5000, + thinking_budget=2000, ), - max_output_tokens=800, + max_output_tokens=2500, ), ) @@ -1005,7 +1006,7 @@ def test_27_extended_thinking_streaming(self, google_client, test_config): # Safety check if chunk_count > 500: - break + raise AssertionError("Received >500 streaming chunks; possible non-terminating stream") # Combine collected content complete_text = "".join(text_parts) @@ -1015,21 +1016,19 @@ def test_27_extended_thinking_streaming(self, google_client, test_config): assert final_usage is not None, "Should have usage metadata" # Check for thoughts in usage metadata - has_thoughts = False - thoughts_token_count = 0 - - if hasattr(final_usage, "thoughts_token_count"): - thoughts_token_count = final_usage.thoughts_token_count - has_thoughts = thoughts_token_count > 0 - - assert has_thoughts, ( - f"Should detect thinking in streaming. " - f"Usage metadata: {final_usage}" - ) - assert thoughts_token_count > 0, ( - f"Should have substantial thinking tokens, got {thoughts_token_count}. " - f"Text parts: {len(text_parts)}" - ) + if provider == "gemini": + has_thoughts = False + thoughts_token_count = 0 + + if hasattr(final_usage, "thoughts_token_count"): + thoughts_token_count = final_usage.thoughts_token_count + has_thoughts = thoughts_token_count > 0 + print(f"Found thoughts with {thoughts_token_count} tokens") + + assert has_thoughts, ( + f"Response should contain thoughts/reasoning tokens. " + f"Usage metadata: {final_usage if hasattr(final_usage, 'thoughts_token_count') else 'None'}" + ) # Should have regular response text too assert len(complete_text) > 0, "Should have regular response text" @@ -1049,7 +1048,6 @@ def test_27_extended_thinking_streaming(self, google_client, test_config): f"Found {keyword_matches} keywords. Content: {complete_text[:200]}..." ) - print(f"✓ Streamed with thinking ({thoughts_token_count} thought tokens)") print(f"✓ Streamed response ({len(text_parts)} chunks): {complete_text[:150]}...") diff --git a/tests/integrations/tests/test_openai.py b/tests/integrations/tests/test_openai.py index 7efcea40f..3744b234c 100644 --- a/tests/integrations/tests/test_openai.py +++ b/tests/integrations/tests/test_openai.py @@ -1131,7 +1131,7 @@ def test_32_responses_simple_text(self, openai_client, test_config, provider, mo response = openai_client.responses.create( model=format_provider_model(provider, model), input=RESPONSES_SIMPLE_TEXT_INPUT, - max_output_tokens=150, + max_output_tokens=1000, ) # Validate response structure @@ -1166,7 +1166,7 @@ def test_33_responses_with_system_message(self, openai_client, test_config, prov response = openai_client.responses.create( model=format_provider_model(provider, model), input=RESPONSES_TEXT_WITH_SYSTEM, - max_output_tokens=200, + max_output_tokens=1000, ) # Validate response structure @@ -1198,7 +1198,7 @@ def test_34_responses_with_image(self, openai_client, test_config, provider, mod response = openai_client.responses.create( model=format_provider_model(provider, model), input=RESPONSES_IMAGE_INPUT, - max_output_tokens=200, + max_output_tokens=1000, ) # Validate response structure @@ -1294,7 +1294,7 @@ def test_36_responses_streaming(self, openai_client, test_config, provider, mode stream = openai_client.responses.create( model=format_provider_model(provider, model), input=RESPONSES_STREAMING_INPUT, - max_output_tokens=150, + max_output_tokens=1000, stream=True, ) @@ -1383,10 +1383,11 @@ def test_37_responses_streaming_with_tools(self, openai_client, test_config, pro ) @skip_if_no_api_key("openai") - def test_38_responses_reasoning(self, openai_client, test_config): + @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("thinking")) + def test_38_responses_reasoning(self, openai_client, test_config, provider, model): """Test Case 38: Responses API with reasoning (gpt-5 model)""" # Use gpt-5 reasoning model - model_to_use = "openai/gpt-5" + model_to_use = format_provider_model(provider, model) try: response = openai_client.responses.create( diff --git a/tests/integrations/tests/utils/common.py b/tests/integrations/tests/utils/common.py index 85adcc360..8b826a633 100644 --- a/tests/integrations/tests/utils/common.py +++ b/tests/integrations/tests/utils/common.py @@ -561,10 +561,10 @@ def assert_valid_chat_response(response: Any, min_length: int = 1): if text_content: content = text_content[0].text elif hasattr(response, "choices") and len(response.choices) > 0: # OpenAI - # Handle OpenAI format + # Handle OpenAI format (content can be string or list) choice = response.choices[0] if hasattr(choice, "message") and hasattr(choice.message, "content"): - content = choice.message.content or "" + content = get_content_string(choice.message.content) elif isinstance(response, dict) and "output" in response: # Bedrock (boto3) # Handle Bedrock format output = response["output"] @@ -616,7 +616,7 @@ def assert_valid_image_response(response: Any): elif hasattr(response, "choices") and len(response.choices) > 0: # OpenAI choice = response.choices[0] if hasattr(choice, "message") and hasattr(choice.message, "content"): - content = (choice.message.content or "").lower() + content = get_content_string(choice.message.content).lower() elif isinstance(response, dict) and "output" in response: # Bedrock (boto3) output = response["output"] if "message" in output and "content" in output["message"]: diff --git a/tests/integrations/uv.lock b/tests/integrations/uv.lock index 43fd293d2..b5d61cc70 100644 --- a/tests/integrations/uv.lock +++ b/tests/integrations/uv.lock @@ -286,7 +286,7 @@ requires-dist = [ { name = "black", marker = "extra == 'dev'", specifier = ">=23.0.0" }, { name = "boto3", specifier = ">=1.34.0" }, { name = "flake8", marker = "extra == 'dev'", specifier = ">=6.0.0" }, - { name = "google-genai", specifier = ">=1.0.0" }, + { name = "google-genai", specifier = ">=1.50.0" }, { name = "httpx", specifier = ">=0.25.0" }, { name = "langchain", specifier = ">=1.1.0" }, { name = "langchain-anthropic", specifier = ">=0.1.0" }, diff --git a/transports/bifrost-http/integrations/anthropic.go b/transports/bifrost-http/integrations/anthropic.go index b7115027a..a765bb017 100644 --- a/transports/bifrost-http/integrations/anthropic.go +++ b/transports/bifrost-http/integrations/anthropic.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "log" "strconv" "strings" @@ -111,8 +110,6 @@ func createAnthropicMessagesRouteConfig(pathPrefix string) []RouteConfig { for _, event := range anthropicResponse { responseJSON, err := sonic.Marshal(event) if err != nil { - // Log JSON marshaling error but continue processing (should not happen) - log.Printf("Failed to marshal streaming response: %v", err) continue } combinedContent += fmt.Sprintf("event: %s\ndata: %s\n\n", event.Type, responseJSON) diff --git a/transports/changelog.md b/transports/changelog.md index e3c827ea0..cb12bc668 100644 --- a/transports/changelog.md +++ b/transports/changelog.md @@ -2,4 +2,6 @@ feat: support for raw response accumulation for streaming feat: support for raw request logging and sending back in response feat: added support for reasoning in chat completions feat: enhanced reasoning support in responses api -enhancement: improved internal inter provider conversions for integrations \ No newline at end of file +enhancement: improved internal inter provider conversions for integrations +feat: switched to gemini native api +feat: fallback to supported request type for custom models used in integration \ No newline at end of file