diff --git a/core/changelog.md b/core/changelog.md index 34031ea15..2be0dfb45 100644 --- a/core/changelog.md +++ b/core/changelog.md @@ -1,2 +1,3 @@ - feat: added support for multiple types in gemini and anthropic structured outputs properties -- fix: ensure request ID is consistently set in context before PreHooks are executed \ No newline at end of file +- fix: ensure request ID is consistently set in context before PreHooks are executed +- fix: correct conversion of thinking level to thinking budget and vice versa in gemini \ No newline at end of file diff --git a/core/providers/gemini/chat.go b/core/providers/gemini/chat.go index 9c477c4ff..9e310abf0 100644 --- a/core/providers/gemini/chat.go +++ b/core/providers/gemini/chat.go @@ -21,7 +21,7 @@ func ToGeminiChatCompletionRequest(bifrostReq *schemas.BifrostChatRequest) *Gemi // Convert parameters to generation config if bifrostReq.Params != nil { - geminiReq.GenerationConfig = convertParamsToGenerationConfig(bifrostReq.Params, []string{}) + geminiReq.GenerationConfig = convertParamsToGenerationConfig(bifrostReq.Params, []string{}, bifrostReq.Model) // Handle tool-related parameters if len(bifrostReq.Params.Tools) > 0 { diff --git a/core/providers/gemini/responses.go b/core/providers/gemini/responses.go index 187d939aa..eddc69bc2 100644 --- a/core/providers/gemini/responses.go +++ b/core/providers/gemini/responses.go @@ -8,6 +8,7 @@ import ( "time" "github.com/bytedance/sonic" + providerUtils "github.com/maximhq/bifrost/core/providers/utils" "github.com/maximhq/bifrost/core/schemas" ) @@ -2026,28 +2027,53 @@ func (r *GeminiGenerationRequest) convertParamsToGenerationConfigResponses(param config.ThinkingConfig = &GenerationConfigThinkingConfig{ IncludeThoughts: true, } - // only set thinking level if max tokens is not set - if params.Reasoning.Effort != nil && params.Reasoning.MaxTokens == nil { - switch *params.Reasoning.Effort { - case "none": - // turn off thinking - config.ThinkingConfig.IncludeThoughts = false - config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0)) - case "minimal", "low": - config.ThinkingConfig.ThinkingLevel = ThinkingLevelLow - case "medium", "high": - config.ThinkingConfig.ThinkingLevel = ThinkingLevelHigh - } + + // Get max tokens for conversions + maxTokens := DefaultCompletionMaxTokens + if config.MaxOutputTokens > 0 { + maxTokens = int(config.MaxOutputTokens) } - if params.Reasoning.MaxTokens != nil { - switch *params.Reasoning.MaxTokens { - case 0: // turn off thinking + minBudget := DefaultReasoningMinBudget + + hasMaxTokens := params.Reasoning.MaxTokens != nil + hasEffort := params.Reasoning.Effort != nil + supportsLevel := isGemini3Plus(r.Model) // Check if model is 3.0+ + + // PRIORITY RULE: If both max_tokens and effort are present, use ONLY max_tokens (budget) + // This ensures we send only thinkingBudget to Gemini, not thinkingLevel + + // Handle "none" effort explicitly (only if max_tokens not present) + if !hasMaxTokens && hasEffort && *params.Reasoning.Effort == "none" { + config.ThinkingConfig.IncludeThoughts = false + config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0)) + } else if hasMaxTokens { + // User provided max_tokens - use thinkingBudget (all Gemini models support this) + // If both max_tokens and effort are present, we ignore effort and use ONLY max_tokens + budget := *params.Reasoning.MaxTokens + switch budget { + case 0: config.ThinkingConfig.IncludeThoughts = false config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0)) - case -1: // dynamic thinking budget - config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(-1)) - default: // constrained thinking budget - config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(*params.Reasoning.MaxTokens)) + case DynamicReasoningBudget: // Special case: -1 means dynamic budget + config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(DynamicReasoningBudget)) + default: + config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budget)) + } + } else if hasEffort { + // User provided effort only (no max_tokens) + if supportsLevel { + // Gemini 3.0+ - use thinkingLevel (more native) + config.ThinkingConfig.ThinkingLevel = schemas.Ptr(effortToThinkingLevel(*params.Reasoning.Effort, r.Model)) + } else { + // Gemini < 3.0 - must convert effort to budget + budgetTokens, err := providerUtils.GetBudgetTokensFromReasoningEffort( + *params.Reasoning.Effort, + minBudget, + maxTokens, + ) + if err == nil { + config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budgetTokens)) + } } } } diff --git a/core/providers/gemini/types.go b/core/providers/gemini/types.go index 534a63449..f0c5ae953 100644 --- a/core/providers/gemini/types.go +++ b/core/providers/gemini/types.go @@ -14,6 +14,11 @@ import ( "github.com/maximhq/bifrost/core/schemas" ) +const MinReasoningMaxTokens = 1 // Minimum max tokens for reasoning - used for estimation of effort level +const DefaultCompletionMaxTokens = 8192 // Default max output tokens for Gemini - used for relative reasoning max token calculation +const DefaultReasoningMinBudget = 1024 // Default minimum reasoning budget for Gemini +const DynamicReasoningBudget = -1 // Special value for dynamic reasoning budget in Gemini + type Role string const ( @@ -914,7 +919,7 @@ type GenerationConfigThinkingConfig struct { ThinkingBudget *int32 `json:"thinkingBudget,omitempty"` // Optional. Indicates the thinking level. - ThinkingLevel ThinkingLevel `json:"thinkingLevel,omitempty"` + ThinkingLevel *string `json:"thinkingLevel,omitempty"` } // Gemini API supports Camel case but genai sdk sends thinking fields as snake_case @@ -922,12 +927,12 @@ type GenerationConfigThinkingConfig struct { func (tc *GenerationConfigThinkingConfig) UnmarshalJSON(data []byte) error { // Define an auxiliary struct with both camelCase and snake_case tags type Alias struct { - IncludeThoughts *bool `json:"includeThoughts"` - IncludeThoughtsSnake *bool `json:"include_thoughts"` - ThinkingBudget *int32 `json:"thinkingBudget"` - ThinkingBudgetSnake *int32 `json:"thinking_budget"` - ThinkingLevel *ThinkingLevel `json:"thinkingLevel"` - ThinkingLevelSnake *ThinkingLevel `json:"thinking_level"` + IncludeThoughts *bool `json:"includeThoughts"` + IncludeThoughtsSnake *bool `json:"include_thoughts"` + ThinkingBudget *int32 `json:"thinkingBudget"` + ThinkingBudgetSnake *int32 `json:"thinking_budget"` + ThinkingLevel *string `json:"thinkingLevel"` + ThinkingLevelSnake *string `json:"thinking_level"` } var aux Alias @@ -949,22 +954,14 @@ func (tc *GenerationConfigThinkingConfig) UnmarshalJSON(data []byte) error { } if aux.ThinkingLevel != nil { - tc.ThinkingLevel = *aux.ThinkingLevel + tc.ThinkingLevel = aux.ThinkingLevel } else if aux.ThinkingLevelSnake != nil { - tc.ThinkingLevel = *aux.ThinkingLevelSnake + tc.ThinkingLevel = aux.ThinkingLevelSnake } return nil } -type ThinkingLevel string - -const ( - ThinkingLevelUnspecified ThinkingLevel = "THINKING_LEVEL_UNSPECIFIED" - ThinkingLevelLow ThinkingLevel = "LOW" - ThinkingLevelHigh ThinkingLevel = "HIGH" -) - type GeminiBatchEmbeddingRequest struct { Requests []GeminiEmbeddingRequest `json:"requests,omitempty"` } diff --git a/core/providers/gemini/utils.go b/core/providers/gemini/utils.go index c303e90e2..55fca2f47 100644 --- a/core/providers/gemini/utils.go +++ b/core/providers/gemini/utils.go @@ -6,9 +6,64 @@ import ( "strings" "github.com/bytedance/sonic" + providerUtils "github.com/maximhq/bifrost/core/providers/utils" "github.com/maximhq/bifrost/core/schemas" ) +// isGemini3Plus returns true if the model is Gemini 3.0 or higher +// Uses simple string operations for hot path performance +func isGemini3Plus(model string) bool { + // Convert to lowercase for case-insensitive comparison + model = strings.ToLower(model) + + // Find "gemini-" prefix + idx := strings.Index(model, "gemini-") + if idx == -1 { + return false + } + + // Get the part after "gemini-" + afterPrefix := model[idx+7:] // len("gemini-") = 7 + if len(afterPrefix) == 0 { + return false + } + + // Check first character - if it's '3' or higher, it's 3.0+ + firstChar := afterPrefix[0] + return firstChar >= '3' +} + +// effortToThinkingLevel converts reasoning effort to Gemini ThinkingLevel string +// Pro models only support "low" or "high" +// Other models support "minimal", "low", "medium", and "high" +func effortToThinkingLevel(effort string, model string) string { + isPro := strings.Contains(strings.ToLower(model), "pro") + + switch effort { + case "none": + return "" // Empty string for no thinking + case "minimal": + if isPro { + return "low" // Pro models don't support minimal, use low + } + return "minimal" + case "low": + return "low" + case "medium": + if isPro { + return "high" // Pro models don't support medium, use high + } + return "medium" + case "high": + return "high" + default: + if isPro { + return "high" + } + return "medium" + } +} + func (r *GeminiGenerationRequest) convertGenerationConfigToResponsesParameters() *schemas.ResponsesParameters { params := &schemas.ResponsesParameters{ ExtraParams: make(map[string]interface{}), @@ -36,23 +91,56 @@ func (r *GeminiGenerationRequest) convertGenerationConfigToResponsesParameters() if strings.Contains(r.Model, "openai") { params.Reasoning.Summary = schemas.Ptr("auto") } - if config.ThinkingConfig.ThinkingLevel != ThinkingLevelUnspecified { - switch config.ThinkingConfig.ThinkingLevel { - case ThinkingLevelLow: - params.Reasoning.Effort = schemas.Ptr("low") - case ThinkingLevelHigh: - params.Reasoning.Effort = schemas.Ptr("high") - } + + // Determine max tokens for conversions + maxTokens := DefaultCompletionMaxTokens + if config.MaxOutputTokens > 0 { + maxTokens = int(config.MaxOutputTokens) } + minBudget := DefaultReasoningMinBudget + + // Priority: Budget first (if present), then Level if config.ThinkingConfig.ThinkingBudget != nil { - params.Reasoning.MaxTokens = schemas.Ptr(int(*config.ThinkingConfig.ThinkingBudget)) - switch *config.ThinkingConfig.ThinkingBudget { + // Budget is set - use it directly + budget := int(*config.ThinkingConfig.ThinkingBudget) + params.Reasoning.MaxTokens = schemas.Ptr(budget) + + // Also provide effort for compatibility + effort := providerUtils.GetReasoningEffortFromBudgetTokens(budget, minBudget, maxTokens) + params.Reasoning.Effort = schemas.Ptr(effort) + + // Handle special cases + switch budget { case 0: params.Reasoning.Effort = schemas.Ptr("none") - case -1: - // dynamic thinking budget - params.Reasoning.Effort = schemas.Ptr("medium") - params.Reasoning.MaxTokens = schemas.Ptr(-1) + case DynamicReasoningBudget: + params.Reasoning.Effort = schemas.Ptr("medium") // dynamic + } + } else if config.ThinkingConfig.ThinkingLevel != nil && *config.ThinkingConfig.ThinkingLevel != "" { + // Level is set (only on 3.0+) - convert to effort and budget + level := *config.ThinkingConfig.ThinkingLevel + var effort string + + // Map Gemini thinking level to Bifrost effort + switch level { + case "minimal": + effort = "minimal" + case "low": + effort = "low" + case "medium": + effort = "medium" + case "high": + effort = "high" + default: + effort = "medium" + } + + params.Reasoning.Effort = schemas.Ptr(effort) + + // Also convert to budget for compatibility + if effort != "none" { + budget, _ := providerUtils.GetBudgetTokensFromReasoningEffort(effort, minBudget, maxTokens) + params.Reasoning.MaxTokens = schemas.Ptr(budget) } } } @@ -357,7 +445,7 @@ func convertGeminiUsageMetadataToResponsesUsage(metadata *GenerateContentRespons } // convertParamsToGenerationConfig converts Bifrost parameters to Gemini GenerationConfig -func convertParamsToGenerationConfig(params *schemas.ChatParameters, responseModalities []string) GenerationConfig { +func convertParamsToGenerationConfig(params *schemas.ChatParameters, responseModalities []string, model string) GenerationConfig { config := GenerationConfig{} // Add response modalities if specified @@ -396,14 +484,54 @@ func convertParamsToGenerationConfig(params *schemas.ChatParameters, responseMod config.ThinkingConfig = &GenerationConfigThinkingConfig{ IncludeThoughts: true, } - if params.Reasoning.MaxTokens != nil { - config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(*params.Reasoning.MaxTokens)) - } else if params.Reasoning.Effort != nil { - switch *params.Reasoning.Effort { - case "minimal", "low": - config.ThinkingConfig.ThinkingLevel = ThinkingLevelLow - case "medium", "high": - config.ThinkingConfig.ThinkingLevel = ThinkingLevelHigh + + // Get max tokens for conversions + maxTokens := DefaultCompletionMaxTokens + if config.MaxOutputTokens > 0 { + maxTokens = int(config.MaxOutputTokens) + } + minBudget := DefaultReasoningMinBudget + + hasMaxTokens := params.Reasoning.MaxTokens != nil + hasEffort := params.Reasoning.Effort != nil + supportsLevel := isGemini3Plus(model) // Check if model is 3.0+ + + // PRIORITY RULE: If both max_tokens and effort are present, use ONLY max_tokens (budget) + // This ensures we send only thinkingBudget to Gemini, not thinkingLevel + + // Handle "none" effort explicitly (only if max_tokens not present) + if !hasMaxTokens && hasEffort && *params.Reasoning.Effort == "none" { + config.ThinkingConfig.IncludeThoughts = false + config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0)) + } else if hasMaxTokens { + // User provided max_tokens - use thinkingBudget (all Gemini models support this) + // If both max_tokens and effort are present, we ignore effort and use ONLY max_tokens + budget := *params.Reasoning.MaxTokens + switch budget { + case 0: + config.ThinkingConfig.IncludeThoughts = false + config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0)) + case DynamicReasoningBudget: // Special case: -1 means dynamic budget + config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(DynamicReasoningBudget)) + default: + config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budget)) + } + } else if hasEffort { + // User provided effort only (no max_tokens) + if supportsLevel { + // Gemini 3.0+ - use thinkingLevel (more native) + level := effortToThinkingLevel(*params.Reasoning.Effort, model) + config.ThinkingConfig.ThinkingLevel = &level + } else { + // Gemini < 3.0 - must convert effort to budget + budgetTokens, err := providerUtils.GetBudgetTokensFromReasoningEffort( + *params.Reasoning.Effort, + minBudget, + maxTokens, + ) + if err == nil { + config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budgetTokens)) + } } } } diff --git a/docs/providers/reasoning.mdx b/docs/providers/reasoning.mdx index b7f35853e..76df40fff 100644 --- a/docs/providers/reasoning.mdx +++ b/docs/providers/reasoning.mdx @@ -21,7 +21,8 @@ Bifrost normalizes all provider-specific reasoning formats to a consistent OpenA | OpenAI | `reasoning` | `reasoning_details` | None | `minimal`, `low`, `medium`, `high` | ✅ | | Anthropic | `thinking` | Content blocks | **1024 tokens** | `enabled` only | ✅ | | Bedrock (Anthropic) | `thinking` | Content blocks | **1024 tokens** | `enabled` only | ✅ | -| Gemini | `thinking_config` | `thought` parts | None | `off`, `low`, `medium`, `high` | ✅ | +| Gemini 2.5+ | `thinking_config` | `thought` parts | 1024 | Budget-only | ✅ | +| Gemini 3.0+ | `thinking_config` | `thought` parts | 1024 | `minimal`, `low`, `medium`, `high` + Budget | ✅ | --- @@ -534,24 +535,49 @@ chatReq := &schemas.BifrostChatRequest{ ### Gemini -Gemini uses `thinking_config` with effort-based configuration. +Gemini uses `thinking_config` with dual support for both token budgets and effort levels, depending on the model version. + +#### Model Version Support + +| Gemini Version | `thinkingBudget` | `thinkingLevel` | Notes | +|----------------|------------------|-----------------|-------| +| **2.5+** | ✅ | ❌ | Budget-only models | +| **3.0+** | ✅ | ✅ | Support both budget and level | + + +**Important**: Only ONE parameter (`thinkingBudget` or `thinkingLevel`) should be sent to Gemini at a time. When both `reasoning.max_tokens` and `reasoning.effort` are provided in a Bifrost request, `max_tokens` takes priority and is converted to `thinkingBudget`. + + +#### Priority Rules + +When both `reasoning.max_tokens` and `reasoning.effort` are present: + +``` +1. If max_tokens is provided → USE thinkingBudget (ignores effort) +2. Else if effort is provided: + - Gemini 3.0+ → USE thinkingLevel (more native) + - Gemini 2.5 → CONVERT effort to thinkingBudget +3. Else → disable reasoning +``` - + ```json -// Bifrost Request +// Bifrost Request - Both fields provided { + "model": "gemini-3.0-flash", "reasoning": { - "effort": "high", - "max_tokens": 4096 + "effort": "high", // Ignored + "max_tokens": 4096 // Takes priority } } -// Gemini Request +// Gemini 3.0+ Request - Only budget sent { "generation_config": { "thinking_config": { + "include_thoughts": true, "thinking_budget": 4096 } } @@ -559,10 +585,142 @@ Gemini uses `thinking_config` with effort-based configuration. ``` - + + +```json +// Bifrost Request - Effort only +{ + "model": "gemini-3.0-flash", + "reasoning": { + "effort": "high" + } +} + +// Gemini 3.0+ Request - Converted to level +{ + "generation_config": { + "thinking_config": { + "include_thoughts": true, + "thinking_level": "high" + } + } +} +``` + + + + +```json +// Bifrost Request - Effort only +{ + "model": "gemini-2.5-flash", + "max_completion_tokens": 4096, + "reasoning": { + "effort": "high" + } +} + +// Gemini 2.5 Request - Converted to budget +// Calculation: 1024 + (0.80 × (4096 - 1024)) = 3482 +{ + "generation_config": { + "thinking_config": { + "include_thoughts": true, + "thinking_budget": 3482 + } + } +} +``` + + + + +#### Model-Specific Level Conversions + +Gemini Pro models have stricter constraints on thinking levels: + +| Bifrost Effort | Non-Pro Models | Pro Models | Notes | +|----------------|----------------|------------|-------| +| `"none"` | Empty string | Empty string | Disables thinking | +| `"minimal"` | `"minimal"` | `"low"` | Pro doesn't support minimal | +| `"low"` | `"low"` | `"low"` | Supported on all | +| `"medium"` | `"medium"` | `"high"` | Pro doesn't support medium | +| `"high"` | `"high"` | `"high"` | Supported on all | + +**Example**: +```go +// For "gemini-3.0-flash-thinking-exp" (non-Pro) +effort: "medium" → thinkingLevel: "medium" + +// For "gemini-3.0-pro" (Pro model) +effort: "medium" → thinkingLevel: "high" // Converted up +``` + +#### Special Values + +| Value | Field | Behavior | Use Case | +|-------|-------|----------|----------| +| `0` | `max_tokens` | `thinking_budget: 0`, `include_thoughts: false` | Explicitly disable reasoning | +| `-1` | `max_tokens` | `thinking_budget: -1` | **Dynamic budget** (Gemini decides) | +| `"none"` | `effort` | `thinking_budget: 0`, `include_thoughts: false` | Disable reasoning | + + + + +```json +// Bifrost Request - Dynamic budget +{ + "reasoning": { + "max_tokens": -1 + } +} + +// Gemini Request - Sent as-is +{ + "generation_config": { + "thinking_config": { + "include_thoughts": true, + "thinking_budget": -1 + } + } +} +``` + + + + +```json +// Bifrost Request - Method 1 +{ + "reasoning": { + "max_tokens": 0 + } +} + +// Bifrost Request - Method 2 +{ + "reasoning": { + "effort": "none" + } +} + +// Gemini Request - Both become +{ + "generation_config": { + "thinking_config": { + "include_thoughts": false, + "thinking_budget": 0 + } + } +} +``` + + + ```go // Using Bifrost Go SDK with Gemini +// Example 1: Dynamic budget chatReq := &schemas.BifrostChatRequest{ Provider: schemas.Gemini, Model: "gemini-2.0-flash-thinking-exp-1219", @@ -570,21 +728,45 @@ chatReq := &schemas.BifrostChatRequest{ Params: &schemas.ChatParameters{ MaxCompletionTokens: schemas.Ptr(4096), Reasoning: &schemas.ChatReasoning{ - MaxTokens: schemas.Ptr(4096), // Gemini native field + MaxTokens: schemas.Ptr(-1), // Let Gemini decide }, }, } -// Bifrost converts to Gemini format: -// generation_config: { -// thinking_config: { -// thinking_budget: 4096 -// } -// } +// Example 2: Effort-based for Gemini 3.0+ +chatReq := &schemas.BifrostChatRequest{ + Provider: schemas.Gemini, + Model: "gemini-3.0-flash", + Input: messages, + Params: &schemas.ChatParameters{ + MaxCompletionTokens: schemas.Ptr(4096), + Reasoning: &schemas.ChatReasoning{ + Effort: schemas.Ptr("high"), // Converts to thinkingLevel + }, + }, +} + +// Example 3: Budget-based (all versions) +chatReq := &schemas.BifrostChatRequest{ + Provider: schemas.Gemini, + Model: "gemini-2.5-flash", + Input: messages, + Params: &schemas.ChatParameters{ + MaxCompletionTokens: schemas.Ptr(4096), + Reasoning: &schemas.ChatReasoning{ + MaxTokens: schemas.Ptr(3000), // Direct budget + }, + }, +} ``` - + + +#### Response Conversion + + + ```json // Gemini Response @@ -609,6 +791,7 @@ chatReq := &schemas.BifrostChatRequest{ "choices": [{ "message": { "content": "The answer is 42.", + "reasoning": "Analyzing the problem...", "reasoning_details": [{ "index": 0, "type": "text", @@ -620,7 +803,7 @@ chatReq := &schemas.BifrostChatRequest{ ``` - + ```go // After calling Bifrost Chat Completions with Gemini @@ -633,7 +816,10 @@ if err != nil { choice := resp.Choices[0] message := choice.Message -// Access reasoning blocks +// Access combined reasoning text +fmt.Printf("Reasoning: %s\n", message.Reasoning) + +// Access detailed reasoning blocks for i, details := range message.ReasoningDetails { if details.Type == "text" { fmt.Printf("Thinking block %d:\n%s\n", i, details.Text) @@ -647,16 +833,34 @@ fmt.Printf("Answer:\n%s\n", message.Content) -**Effort Level Mapping**: +#### Conversion Summary + +**Bifrost → Gemini (Request)**: -| Bifrost Effort | Gemini Mode | -|----------------|-------------| -| Not set | `off` | -| `low` | Uses budget | -| `medium` | Uses budget | -| `high` | Uses budget | +| Input | Gemini 2.5 | Gemini 3.0+ | Note | +|-------|------------|-------------|------| +| `max_tokens: 4096` | `thinking_budget: 4096` | `thinking_budget: 4096` | Direct pass-through | +| `max_tokens: -1` | `thinking_budget: -1` | `thinking_budget: -1` | Dynamic budget | +| `max_tokens: 0` | `thinking_budget: 0` | `thinking_budget: 0` | Disabled | +| `effort: "high"` only | `thinking_budget: 3482`* | `thinking_level: "high"` | Estimated or native | +| `effort: "medium"` only | `thinking_budget: 2330`* | `thinking_level: "medium"` or `"high"`** | Estimated or native | +| Both `effort` + `max_tokens` | Uses `max_tokens` | Uses `max_tokens` | Priority rule | -**Code Reference**: `core/providers/gemini/chat.go` +\* Assumes `max_completion_tokens: 8192` (default), uses estimation formula +\*\* Pro models convert `"medium"` to `"high"` + +**Gemini → Bifrost (Response)**: + +| Gemini Field | Bifrost Field | Conversion | +|--------------|---------------|------------| +| `thinking_budget` | `reasoning.max_tokens` | Direct mapping | +| `thinking_level` | `reasoning.effort` | Level → effort mapping | +| `thought: true` parts | `reasoning_details[]` | Array of reasoning blocks | + +**Code References**: +- `core/providers/gemini/utils.go` (Chat Completions) +- `core/providers/gemini/responses.go` (Responses API) +- `core/providers/gemini/types.go` (Constants) --- @@ -879,7 +1083,7 @@ Different providers have different constraints on reasoning budget: | Bedrock Anthropic | `core/providers/bedrock/types.go` | **1024** | Same as Anthropic | | Bedrock Nova | `core/providers/bedrock/types.go` | 1 | More flexible | | Cohere | `core/providers/cohere/types.go` | 1 | Flexible | -| Gemini | `core/providers/gemini/types.go` | 1 | Flexible | +| Gemini | `core/providers/gemini/types.go` | 1024 | Default minimum for conversions | ### Default Completion Tokens (for ratio calculation) @@ -887,7 +1091,8 @@ When `max_completion_tokens` is not provided, these defaults are used for ratio | Provider | Default | File | |----------|---------|------| -| All providers | 4096 | `core/providers/*/types.go` | +| OpenAI, Anthropic, Cohere, Bedrock | 4096 | `core/providers/*/types.go` | +| Gemini | 8192 | `core/providers/gemini/types.go` | --- @@ -1256,6 +1461,27 @@ data: {"type": "content_block_stop"} **Impact**: Cannot disable thinking once reasoning param is present + +**Severity**: Medium +**Behavior**: When both `effort` and `max_tokens` are provided, only `thinkingBudget` is sent to Gemini (effort is dropped) +**Impact**: Effort value is completely ignored when max_tokens is present +**Workaround**: Provide only the parameter you want to use + + + +**Severity**: Medium +**Behavior**: Gemini 2.5 only supports `thinkingBudget`, while 3.0+ supports both `thinkingBudget` and `thinkingLevel` +**Impact**: Effort-only requests on 2.5 are converted to budget; on 3.0+ they use native levels +**Note**: Bifrost automatically detects version and uses appropriate conversion + + + +**Severity**: Low +**Behavior**: Pro models only support "low" and "high" thinking levels +**Impact**: `"minimal"` → `"low"`, `"medium"` → `"high"` for Pro models +**Note**: Non-Pro models support all four levels: minimal, low, medium, high + + --- ## Complete Provider Comparison @@ -1268,7 +1494,8 @@ data: {"type": "content_block_stop"} | Anthropic | Thinking blocks | Token budget | **1024** | ✅ | | Bedrock (Anthropic) | Reasoning config | Token budget | **1024** | ✅ | | Bedrock (Nova) | Reasoning config | Effort-based | None | ❌ | -| Gemini | Thinking config | Token-based | None | ✅ | +| Gemini 2.5+ | Thinking config | Token budget | 1024 | ✅ | +| Gemini 3.0+ | Thinking config | Dual (budget + level) | 1024 | ✅ | ### Parameter Support @@ -1278,7 +1505,8 @@ data: {"type": "content_block_stop"} | Anthropic | ❌ (binary) | ✅ | ✅ | ✅ | | Bedrock (Anthropic) | ❌ (binary) | ✅ | ✅ | ✅ | | Bedrock (Nova) | ✅ (3 levels) | ⚠️ (ignored) | ❌ | ✅ | -| Gemini | ✅ (implicit) | ✅ | ❌ | ✅ | +| Gemini 2.5+ | ⚠️ (converts to budget) | ✅ | ❌ | ✅ | +| Gemini 3.0+ | ✅ (4 levels) | ✅ | ❌ | ✅ | --- diff --git a/transports/changelog.md b/transports/changelog.md index eec331ec5..ea2fc1984 100644 --- a/transports/changelog.md +++ b/transports/changelog.md @@ -1,4 +1,5 @@ - feat: added support for multiple types in gemini and anthropic structured outputs properties - fix: added missing logs filter checks in ui for live updates - fix: ensure request ID is consistently set in context before PreHooks are executed -- docs: updated docs for xai provider \ No newline at end of file +- docs: updated docs for xai provider +- fix: correct conversion of thinking level to thinking budget and vice versa in gemini \ No newline at end of file