diff --git a/core/changelog.md b/core/changelog.md
index 34031ea15..2be0dfb45 100644
--- a/core/changelog.md
+++ b/core/changelog.md
@@ -1,2 +1,3 @@
- feat: added support for multiple types in gemini and anthropic structured outputs properties
-- fix: ensure request ID is consistently set in context before PreHooks are executed
\ No newline at end of file
+- fix: ensure request ID is consistently set in context before PreHooks are executed
+- fix: correct conversion of thinking level to thinking budget and vice versa in gemini
\ No newline at end of file
diff --git a/core/providers/gemini/chat.go b/core/providers/gemini/chat.go
index 9c477c4ff..9e310abf0 100644
--- a/core/providers/gemini/chat.go
+++ b/core/providers/gemini/chat.go
@@ -21,7 +21,7 @@ func ToGeminiChatCompletionRequest(bifrostReq *schemas.BifrostChatRequest) *Gemi
// Convert parameters to generation config
if bifrostReq.Params != nil {
- geminiReq.GenerationConfig = convertParamsToGenerationConfig(bifrostReq.Params, []string{})
+ geminiReq.GenerationConfig = convertParamsToGenerationConfig(bifrostReq.Params, []string{}, bifrostReq.Model)
// Handle tool-related parameters
if len(bifrostReq.Params.Tools) > 0 {
diff --git a/core/providers/gemini/responses.go b/core/providers/gemini/responses.go
index 187d939aa..eddc69bc2 100644
--- a/core/providers/gemini/responses.go
+++ b/core/providers/gemini/responses.go
@@ -8,6 +8,7 @@ import (
"time"
"github.com/bytedance/sonic"
+ providerUtils "github.com/maximhq/bifrost/core/providers/utils"
"github.com/maximhq/bifrost/core/schemas"
)
@@ -2026,28 +2027,53 @@ func (r *GeminiGenerationRequest) convertParamsToGenerationConfigResponses(param
config.ThinkingConfig = &GenerationConfigThinkingConfig{
IncludeThoughts: true,
}
- // only set thinking level if max tokens is not set
- if params.Reasoning.Effort != nil && params.Reasoning.MaxTokens == nil {
- switch *params.Reasoning.Effort {
- case "none":
- // turn off thinking
- config.ThinkingConfig.IncludeThoughts = false
- config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0))
- case "minimal", "low":
- config.ThinkingConfig.ThinkingLevel = ThinkingLevelLow
- case "medium", "high":
- config.ThinkingConfig.ThinkingLevel = ThinkingLevelHigh
- }
+
+ // Get max tokens for conversions
+ maxTokens := DefaultCompletionMaxTokens
+ if config.MaxOutputTokens > 0 {
+ maxTokens = int(config.MaxOutputTokens)
}
- if params.Reasoning.MaxTokens != nil {
- switch *params.Reasoning.MaxTokens {
- case 0: // turn off thinking
+ minBudget := DefaultReasoningMinBudget
+
+ hasMaxTokens := params.Reasoning.MaxTokens != nil
+ hasEffort := params.Reasoning.Effort != nil
+ supportsLevel := isGemini3Plus(r.Model) // Check if model is 3.0+
+
+ // PRIORITY RULE: If both max_tokens and effort are present, use ONLY max_tokens (budget)
+ // This ensures we send only thinkingBudget to Gemini, not thinkingLevel
+
+ // Handle "none" effort explicitly (only if max_tokens not present)
+ if !hasMaxTokens && hasEffort && *params.Reasoning.Effort == "none" {
+ config.ThinkingConfig.IncludeThoughts = false
+ config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0))
+ } else if hasMaxTokens {
+ // User provided max_tokens - use thinkingBudget (all Gemini models support this)
+ // If both max_tokens and effort are present, we ignore effort and use ONLY max_tokens
+ budget := *params.Reasoning.MaxTokens
+ switch budget {
+ case 0:
config.ThinkingConfig.IncludeThoughts = false
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0))
- case -1: // dynamic thinking budget
- config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(-1))
- default: // constrained thinking budget
- config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(*params.Reasoning.MaxTokens))
+ case DynamicReasoningBudget: // Special case: -1 means dynamic budget
+ config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(DynamicReasoningBudget))
+ default:
+ config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budget))
+ }
+ } else if hasEffort {
+ // User provided effort only (no max_tokens)
+ if supportsLevel {
+ // Gemini 3.0+ - use thinkingLevel (more native)
+ config.ThinkingConfig.ThinkingLevel = schemas.Ptr(effortToThinkingLevel(*params.Reasoning.Effort, r.Model))
+ } else {
+ // Gemini < 3.0 - must convert effort to budget
+ budgetTokens, err := providerUtils.GetBudgetTokensFromReasoningEffort(
+ *params.Reasoning.Effort,
+ minBudget,
+ maxTokens,
+ )
+ if err == nil {
+ config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budgetTokens))
+ }
}
}
}
diff --git a/core/providers/gemini/types.go b/core/providers/gemini/types.go
index 534a63449..f0c5ae953 100644
--- a/core/providers/gemini/types.go
+++ b/core/providers/gemini/types.go
@@ -14,6 +14,11 @@ import (
"github.com/maximhq/bifrost/core/schemas"
)
+const MinReasoningMaxTokens = 1 // Minimum max tokens for reasoning - used for estimation of effort level
+const DefaultCompletionMaxTokens = 8192 // Default max output tokens for Gemini - used for relative reasoning max token calculation
+const DefaultReasoningMinBudget = 1024 // Default minimum reasoning budget for Gemini
+const DynamicReasoningBudget = -1 // Special value for dynamic reasoning budget in Gemini
+
type Role string
const (
@@ -914,7 +919,7 @@ type GenerationConfigThinkingConfig struct {
ThinkingBudget *int32 `json:"thinkingBudget,omitempty"`
// Optional. Indicates the thinking level.
- ThinkingLevel ThinkingLevel `json:"thinkingLevel,omitempty"`
+ ThinkingLevel *string `json:"thinkingLevel,omitempty"`
}
// Gemini API supports Camel case but genai sdk sends thinking fields as snake_case
@@ -922,12 +927,12 @@ type GenerationConfigThinkingConfig struct {
func (tc *GenerationConfigThinkingConfig) UnmarshalJSON(data []byte) error {
// Define an auxiliary struct with both camelCase and snake_case tags
type Alias struct {
- IncludeThoughts *bool `json:"includeThoughts"`
- IncludeThoughtsSnake *bool `json:"include_thoughts"`
- ThinkingBudget *int32 `json:"thinkingBudget"`
- ThinkingBudgetSnake *int32 `json:"thinking_budget"`
- ThinkingLevel *ThinkingLevel `json:"thinkingLevel"`
- ThinkingLevelSnake *ThinkingLevel `json:"thinking_level"`
+ IncludeThoughts *bool `json:"includeThoughts"`
+ IncludeThoughtsSnake *bool `json:"include_thoughts"`
+ ThinkingBudget *int32 `json:"thinkingBudget"`
+ ThinkingBudgetSnake *int32 `json:"thinking_budget"`
+ ThinkingLevel *string `json:"thinkingLevel"`
+ ThinkingLevelSnake *string `json:"thinking_level"`
}
var aux Alias
@@ -949,22 +954,14 @@ func (tc *GenerationConfigThinkingConfig) UnmarshalJSON(data []byte) error {
}
if aux.ThinkingLevel != nil {
- tc.ThinkingLevel = *aux.ThinkingLevel
+ tc.ThinkingLevel = aux.ThinkingLevel
} else if aux.ThinkingLevelSnake != nil {
- tc.ThinkingLevel = *aux.ThinkingLevelSnake
+ tc.ThinkingLevel = aux.ThinkingLevelSnake
}
return nil
}
-type ThinkingLevel string
-
-const (
- ThinkingLevelUnspecified ThinkingLevel = "THINKING_LEVEL_UNSPECIFIED"
- ThinkingLevelLow ThinkingLevel = "LOW"
- ThinkingLevelHigh ThinkingLevel = "HIGH"
-)
-
type GeminiBatchEmbeddingRequest struct {
Requests []GeminiEmbeddingRequest `json:"requests,omitempty"`
}
diff --git a/core/providers/gemini/utils.go b/core/providers/gemini/utils.go
index c303e90e2..55fca2f47 100644
--- a/core/providers/gemini/utils.go
+++ b/core/providers/gemini/utils.go
@@ -6,9 +6,64 @@ import (
"strings"
"github.com/bytedance/sonic"
+ providerUtils "github.com/maximhq/bifrost/core/providers/utils"
"github.com/maximhq/bifrost/core/schemas"
)
+// isGemini3Plus returns true if the model is Gemini 3.0 or higher
+// Uses simple string operations for hot path performance
+func isGemini3Plus(model string) bool {
+ // Convert to lowercase for case-insensitive comparison
+ model = strings.ToLower(model)
+
+ // Find "gemini-" prefix
+ idx := strings.Index(model, "gemini-")
+ if idx == -1 {
+ return false
+ }
+
+ // Get the part after "gemini-"
+ afterPrefix := model[idx+7:] // len("gemini-") = 7
+ if len(afterPrefix) == 0 {
+ return false
+ }
+
+ // Check first character - if it's '3' or higher, it's 3.0+
+ firstChar := afterPrefix[0]
+ return firstChar >= '3'
+}
+
+// effortToThinkingLevel converts reasoning effort to Gemini ThinkingLevel string
+// Pro models only support "low" or "high"
+// Other models support "minimal", "low", "medium", and "high"
+func effortToThinkingLevel(effort string, model string) string {
+ isPro := strings.Contains(strings.ToLower(model), "pro")
+
+ switch effort {
+ case "none":
+ return "" // Empty string for no thinking
+ case "minimal":
+ if isPro {
+ return "low" // Pro models don't support minimal, use low
+ }
+ return "minimal"
+ case "low":
+ return "low"
+ case "medium":
+ if isPro {
+ return "high" // Pro models don't support medium, use high
+ }
+ return "medium"
+ case "high":
+ return "high"
+ default:
+ if isPro {
+ return "high"
+ }
+ return "medium"
+ }
+}
+
func (r *GeminiGenerationRequest) convertGenerationConfigToResponsesParameters() *schemas.ResponsesParameters {
params := &schemas.ResponsesParameters{
ExtraParams: make(map[string]interface{}),
@@ -36,23 +91,56 @@ func (r *GeminiGenerationRequest) convertGenerationConfigToResponsesParameters()
if strings.Contains(r.Model, "openai") {
params.Reasoning.Summary = schemas.Ptr("auto")
}
- if config.ThinkingConfig.ThinkingLevel != ThinkingLevelUnspecified {
- switch config.ThinkingConfig.ThinkingLevel {
- case ThinkingLevelLow:
- params.Reasoning.Effort = schemas.Ptr("low")
- case ThinkingLevelHigh:
- params.Reasoning.Effort = schemas.Ptr("high")
- }
+
+ // Determine max tokens for conversions
+ maxTokens := DefaultCompletionMaxTokens
+ if config.MaxOutputTokens > 0 {
+ maxTokens = int(config.MaxOutputTokens)
}
+ minBudget := DefaultReasoningMinBudget
+
+ // Priority: Budget first (if present), then Level
if config.ThinkingConfig.ThinkingBudget != nil {
- params.Reasoning.MaxTokens = schemas.Ptr(int(*config.ThinkingConfig.ThinkingBudget))
- switch *config.ThinkingConfig.ThinkingBudget {
+ // Budget is set - use it directly
+ budget := int(*config.ThinkingConfig.ThinkingBudget)
+ params.Reasoning.MaxTokens = schemas.Ptr(budget)
+
+ // Also provide effort for compatibility
+ effort := providerUtils.GetReasoningEffortFromBudgetTokens(budget, minBudget, maxTokens)
+ params.Reasoning.Effort = schemas.Ptr(effort)
+
+ // Handle special cases
+ switch budget {
case 0:
params.Reasoning.Effort = schemas.Ptr("none")
- case -1:
- // dynamic thinking budget
- params.Reasoning.Effort = schemas.Ptr("medium")
- params.Reasoning.MaxTokens = schemas.Ptr(-1)
+ case DynamicReasoningBudget:
+ params.Reasoning.Effort = schemas.Ptr("medium") // dynamic
+ }
+ } else if config.ThinkingConfig.ThinkingLevel != nil && *config.ThinkingConfig.ThinkingLevel != "" {
+ // Level is set (only on 3.0+) - convert to effort and budget
+ level := *config.ThinkingConfig.ThinkingLevel
+ var effort string
+
+ // Map Gemini thinking level to Bifrost effort
+ switch level {
+ case "minimal":
+ effort = "minimal"
+ case "low":
+ effort = "low"
+ case "medium":
+ effort = "medium"
+ case "high":
+ effort = "high"
+ default:
+ effort = "medium"
+ }
+
+ params.Reasoning.Effort = schemas.Ptr(effort)
+
+ // Also convert to budget for compatibility
+ if effort != "none" {
+ budget, _ := providerUtils.GetBudgetTokensFromReasoningEffort(effort, minBudget, maxTokens)
+ params.Reasoning.MaxTokens = schemas.Ptr(budget)
}
}
}
@@ -357,7 +445,7 @@ func convertGeminiUsageMetadataToResponsesUsage(metadata *GenerateContentRespons
}
// convertParamsToGenerationConfig converts Bifrost parameters to Gemini GenerationConfig
-func convertParamsToGenerationConfig(params *schemas.ChatParameters, responseModalities []string) GenerationConfig {
+func convertParamsToGenerationConfig(params *schemas.ChatParameters, responseModalities []string, model string) GenerationConfig {
config := GenerationConfig{}
// Add response modalities if specified
@@ -396,14 +484,54 @@ func convertParamsToGenerationConfig(params *schemas.ChatParameters, responseMod
config.ThinkingConfig = &GenerationConfigThinkingConfig{
IncludeThoughts: true,
}
- if params.Reasoning.MaxTokens != nil {
- config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(*params.Reasoning.MaxTokens))
- } else if params.Reasoning.Effort != nil {
- switch *params.Reasoning.Effort {
- case "minimal", "low":
- config.ThinkingConfig.ThinkingLevel = ThinkingLevelLow
- case "medium", "high":
- config.ThinkingConfig.ThinkingLevel = ThinkingLevelHigh
+
+ // Get max tokens for conversions
+ maxTokens := DefaultCompletionMaxTokens
+ if config.MaxOutputTokens > 0 {
+ maxTokens = int(config.MaxOutputTokens)
+ }
+ minBudget := DefaultReasoningMinBudget
+
+ hasMaxTokens := params.Reasoning.MaxTokens != nil
+ hasEffort := params.Reasoning.Effort != nil
+ supportsLevel := isGemini3Plus(model) // Check if model is 3.0+
+
+ // PRIORITY RULE: If both max_tokens and effort are present, use ONLY max_tokens (budget)
+ // This ensures we send only thinkingBudget to Gemini, not thinkingLevel
+
+ // Handle "none" effort explicitly (only if max_tokens not present)
+ if !hasMaxTokens && hasEffort && *params.Reasoning.Effort == "none" {
+ config.ThinkingConfig.IncludeThoughts = false
+ config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0))
+ } else if hasMaxTokens {
+ // User provided max_tokens - use thinkingBudget (all Gemini models support this)
+ // If both max_tokens and effort are present, we ignore effort and use ONLY max_tokens
+ budget := *params.Reasoning.MaxTokens
+ switch budget {
+ case 0:
+ config.ThinkingConfig.IncludeThoughts = false
+ config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0))
+ case DynamicReasoningBudget: // Special case: -1 means dynamic budget
+ config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(DynamicReasoningBudget))
+ default:
+ config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budget))
+ }
+ } else if hasEffort {
+ // User provided effort only (no max_tokens)
+ if supportsLevel {
+ // Gemini 3.0+ - use thinkingLevel (more native)
+ level := effortToThinkingLevel(*params.Reasoning.Effort, model)
+ config.ThinkingConfig.ThinkingLevel = &level
+ } else {
+ // Gemini < 3.0 - must convert effort to budget
+ budgetTokens, err := providerUtils.GetBudgetTokensFromReasoningEffort(
+ *params.Reasoning.Effort,
+ minBudget,
+ maxTokens,
+ )
+ if err == nil {
+ config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budgetTokens))
+ }
}
}
}
diff --git a/docs/providers/reasoning.mdx b/docs/providers/reasoning.mdx
index b7f35853e..76df40fff 100644
--- a/docs/providers/reasoning.mdx
+++ b/docs/providers/reasoning.mdx
@@ -21,7 +21,8 @@ Bifrost normalizes all provider-specific reasoning formats to a consistent OpenA
| OpenAI | `reasoning` | `reasoning_details` | None | `minimal`, `low`, `medium`, `high` | ✅ |
| Anthropic | `thinking` | Content blocks | **1024 tokens** | `enabled` only | ✅ |
| Bedrock (Anthropic) | `thinking` | Content blocks | **1024 tokens** | `enabled` only | ✅ |
-| Gemini | `thinking_config` | `thought` parts | None | `off`, `low`, `medium`, `high` | ✅ |
+| Gemini 2.5+ | `thinking_config` | `thought` parts | 1024 | Budget-only | ✅ |
+| Gemini 3.0+ | `thinking_config` | `thought` parts | 1024 | `minimal`, `low`, `medium`, `high` + Budget | ✅ |
---
@@ -534,24 +535,49 @@ chatReq := &schemas.BifrostChatRequest{
### Gemini
-Gemini uses `thinking_config` with effort-based configuration.
+Gemini uses `thinking_config` with dual support for both token budgets and effort levels, depending on the model version.
+
+#### Model Version Support
+
+| Gemini Version | `thinkingBudget` | `thinkingLevel` | Notes |
+|----------------|------------------|-----------------|-------|
+| **2.5+** | ✅ | ❌ | Budget-only models |
+| **3.0+** | ✅ | ✅ | Support both budget and level |
+
+
+**Important**: Only ONE parameter (`thinkingBudget` or `thinkingLevel`) should be sent to Gemini at a time. When both `reasoning.max_tokens` and `reasoning.effort` are provided in a Bifrost request, `max_tokens` takes priority and is converted to `thinkingBudget`.
+
+
+#### Priority Rules
+
+When both `reasoning.max_tokens` and `reasoning.effort` are present:
+
+```
+1. If max_tokens is provided → USE thinkingBudget (ignores effort)
+2. Else if effort is provided:
+ - Gemini 3.0+ → USE thinkingLevel (more native)
+ - Gemini 2.5 → CONVERT effort to thinkingBudget
+3. Else → disable reasoning
+```
-
+
```json
-// Bifrost Request
+// Bifrost Request - Both fields provided
{
+ "model": "gemini-3.0-flash",
"reasoning": {
- "effort": "high",
- "max_tokens": 4096
+ "effort": "high", // Ignored
+ "max_tokens": 4096 // Takes priority
}
}
-// Gemini Request
+// Gemini 3.0+ Request - Only budget sent
{
"generation_config": {
"thinking_config": {
+ "include_thoughts": true,
"thinking_budget": 4096
}
}
@@ -559,10 +585,142 @@ Gemini uses `thinking_config` with effort-based configuration.
```
-
+
+
+```json
+// Bifrost Request - Effort only
+{
+ "model": "gemini-3.0-flash",
+ "reasoning": {
+ "effort": "high"
+ }
+}
+
+// Gemini 3.0+ Request - Converted to level
+{
+ "generation_config": {
+ "thinking_config": {
+ "include_thoughts": true,
+ "thinking_level": "high"
+ }
+ }
+}
+```
+
+
+
+
+```json
+// Bifrost Request - Effort only
+{
+ "model": "gemini-2.5-flash",
+ "max_completion_tokens": 4096,
+ "reasoning": {
+ "effort": "high"
+ }
+}
+
+// Gemini 2.5 Request - Converted to budget
+// Calculation: 1024 + (0.80 × (4096 - 1024)) = 3482
+{
+ "generation_config": {
+ "thinking_config": {
+ "include_thoughts": true,
+ "thinking_budget": 3482
+ }
+ }
+}
+```
+
+
+
+
+#### Model-Specific Level Conversions
+
+Gemini Pro models have stricter constraints on thinking levels:
+
+| Bifrost Effort | Non-Pro Models | Pro Models | Notes |
+|----------------|----------------|------------|-------|
+| `"none"` | Empty string | Empty string | Disables thinking |
+| `"minimal"` | `"minimal"` | `"low"` | Pro doesn't support minimal |
+| `"low"` | `"low"` | `"low"` | Supported on all |
+| `"medium"` | `"medium"` | `"high"` | Pro doesn't support medium |
+| `"high"` | `"high"` | `"high"` | Supported on all |
+
+**Example**:
+```go
+// For "gemini-3.0-flash-thinking-exp" (non-Pro)
+effort: "medium" → thinkingLevel: "medium"
+
+// For "gemini-3.0-pro" (Pro model)
+effort: "medium" → thinkingLevel: "high" // Converted up
+```
+
+#### Special Values
+
+| Value | Field | Behavior | Use Case |
+|-------|-------|----------|----------|
+| `0` | `max_tokens` | `thinking_budget: 0`, `include_thoughts: false` | Explicitly disable reasoning |
+| `-1` | `max_tokens` | `thinking_budget: -1` | **Dynamic budget** (Gemini decides) |
+| `"none"` | `effort` | `thinking_budget: 0`, `include_thoughts: false` | Disable reasoning |
+
+
+
+
+```json
+// Bifrost Request - Dynamic budget
+{
+ "reasoning": {
+ "max_tokens": -1
+ }
+}
+
+// Gemini Request - Sent as-is
+{
+ "generation_config": {
+ "thinking_config": {
+ "include_thoughts": true,
+ "thinking_budget": -1
+ }
+ }
+}
+```
+
+
+
+
+```json
+// Bifrost Request - Method 1
+{
+ "reasoning": {
+ "max_tokens": 0
+ }
+}
+
+// Bifrost Request - Method 2
+{
+ "reasoning": {
+ "effort": "none"
+ }
+}
+
+// Gemini Request - Both become
+{
+ "generation_config": {
+ "thinking_config": {
+ "include_thoughts": false,
+ "thinking_budget": 0
+ }
+ }
+}
+```
+
+
+
```go
// Using Bifrost Go SDK with Gemini
+// Example 1: Dynamic budget
chatReq := &schemas.BifrostChatRequest{
Provider: schemas.Gemini,
Model: "gemini-2.0-flash-thinking-exp-1219",
@@ -570,21 +728,45 @@ chatReq := &schemas.BifrostChatRequest{
Params: &schemas.ChatParameters{
MaxCompletionTokens: schemas.Ptr(4096),
Reasoning: &schemas.ChatReasoning{
- MaxTokens: schemas.Ptr(4096), // Gemini native field
+ MaxTokens: schemas.Ptr(-1), // Let Gemini decide
},
},
}
-// Bifrost converts to Gemini format:
-// generation_config: {
-// thinking_config: {
-// thinking_budget: 4096
-// }
-// }
+// Example 2: Effort-based for Gemini 3.0+
+chatReq := &schemas.BifrostChatRequest{
+ Provider: schemas.Gemini,
+ Model: "gemini-3.0-flash",
+ Input: messages,
+ Params: &schemas.ChatParameters{
+ MaxCompletionTokens: schemas.Ptr(4096),
+ Reasoning: &schemas.ChatReasoning{
+ Effort: schemas.Ptr("high"), // Converts to thinkingLevel
+ },
+ },
+}
+
+// Example 3: Budget-based (all versions)
+chatReq := &schemas.BifrostChatRequest{
+ Provider: schemas.Gemini,
+ Model: "gemini-2.5-flash",
+ Input: messages,
+ Params: &schemas.ChatParameters{
+ MaxCompletionTokens: schemas.Ptr(4096),
+ Reasoning: &schemas.ChatReasoning{
+ MaxTokens: schemas.Ptr(3000), // Direct budget
+ },
+ },
+}
```
-
+
+
+#### Response Conversion
+
+
+
```json
// Gemini Response
@@ -609,6 +791,7 @@ chatReq := &schemas.BifrostChatRequest{
"choices": [{
"message": {
"content": "The answer is 42.",
+ "reasoning": "Analyzing the problem...",
"reasoning_details": [{
"index": 0,
"type": "text",
@@ -620,7 +803,7 @@ chatReq := &schemas.BifrostChatRequest{
```
-
+
```go
// After calling Bifrost Chat Completions with Gemini
@@ -633,7 +816,10 @@ if err != nil {
choice := resp.Choices[0]
message := choice.Message
-// Access reasoning blocks
+// Access combined reasoning text
+fmt.Printf("Reasoning: %s\n", message.Reasoning)
+
+// Access detailed reasoning blocks
for i, details := range message.ReasoningDetails {
if details.Type == "text" {
fmt.Printf("Thinking block %d:\n%s\n", i, details.Text)
@@ -647,16 +833,34 @@ fmt.Printf("Answer:\n%s\n", message.Content)
-**Effort Level Mapping**:
+#### Conversion Summary
+
+**Bifrost → Gemini (Request)**:
-| Bifrost Effort | Gemini Mode |
-|----------------|-------------|
-| Not set | `off` |
-| `low` | Uses budget |
-| `medium` | Uses budget |
-| `high` | Uses budget |
+| Input | Gemini 2.5 | Gemini 3.0+ | Note |
+|-------|------------|-------------|------|
+| `max_tokens: 4096` | `thinking_budget: 4096` | `thinking_budget: 4096` | Direct pass-through |
+| `max_tokens: -1` | `thinking_budget: -1` | `thinking_budget: -1` | Dynamic budget |
+| `max_tokens: 0` | `thinking_budget: 0` | `thinking_budget: 0` | Disabled |
+| `effort: "high"` only | `thinking_budget: 3482`* | `thinking_level: "high"` | Estimated or native |
+| `effort: "medium"` only | `thinking_budget: 2330`* | `thinking_level: "medium"` or `"high"`** | Estimated or native |
+| Both `effort` + `max_tokens` | Uses `max_tokens` | Uses `max_tokens` | Priority rule |
-**Code Reference**: `core/providers/gemini/chat.go`
+\* Assumes `max_completion_tokens: 8192` (default), uses estimation formula
+\*\* Pro models convert `"medium"` to `"high"`
+
+**Gemini → Bifrost (Response)**:
+
+| Gemini Field | Bifrost Field | Conversion |
+|--------------|---------------|------------|
+| `thinking_budget` | `reasoning.max_tokens` | Direct mapping |
+| `thinking_level` | `reasoning.effort` | Level → effort mapping |
+| `thought: true` parts | `reasoning_details[]` | Array of reasoning blocks |
+
+**Code References**:
+- `core/providers/gemini/utils.go` (Chat Completions)
+- `core/providers/gemini/responses.go` (Responses API)
+- `core/providers/gemini/types.go` (Constants)
---
@@ -879,7 +1083,7 @@ Different providers have different constraints on reasoning budget:
| Bedrock Anthropic | `core/providers/bedrock/types.go` | **1024** | Same as Anthropic |
| Bedrock Nova | `core/providers/bedrock/types.go` | 1 | More flexible |
| Cohere | `core/providers/cohere/types.go` | 1 | Flexible |
-| Gemini | `core/providers/gemini/types.go` | 1 | Flexible |
+| Gemini | `core/providers/gemini/types.go` | 1024 | Default minimum for conversions |
### Default Completion Tokens (for ratio calculation)
@@ -887,7 +1091,8 @@ When `max_completion_tokens` is not provided, these defaults are used for ratio
| Provider | Default | File |
|----------|---------|------|
-| All providers | 4096 | `core/providers/*/types.go` |
+| OpenAI, Anthropic, Cohere, Bedrock | 4096 | `core/providers/*/types.go` |
+| Gemini | 8192 | `core/providers/gemini/types.go` |
---
@@ -1256,6 +1461,27 @@ data: {"type": "content_block_stop"}
**Impact**: Cannot disable thinking once reasoning param is present
+
+**Severity**: Medium
+**Behavior**: When both `effort` and `max_tokens` are provided, only `thinkingBudget` is sent to Gemini (effort is dropped)
+**Impact**: Effort value is completely ignored when max_tokens is present
+**Workaround**: Provide only the parameter you want to use
+
+
+
+**Severity**: Medium
+**Behavior**: Gemini 2.5 only supports `thinkingBudget`, while 3.0+ supports both `thinkingBudget` and `thinkingLevel`
+**Impact**: Effort-only requests on 2.5 are converted to budget; on 3.0+ they use native levels
+**Note**: Bifrost automatically detects version and uses appropriate conversion
+
+
+
+**Severity**: Low
+**Behavior**: Pro models only support "low" and "high" thinking levels
+**Impact**: `"minimal"` → `"low"`, `"medium"` → `"high"` for Pro models
+**Note**: Non-Pro models support all four levels: minimal, low, medium, high
+
+
---
## Complete Provider Comparison
@@ -1268,7 +1494,8 @@ data: {"type": "content_block_stop"}
| Anthropic | Thinking blocks | Token budget | **1024** | ✅ |
| Bedrock (Anthropic) | Reasoning config | Token budget | **1024** | ✅ |
| Bedrock (Nova) | Reasoning config | Effort-based | None | ❌ |
-| Gemini | Thinking config | Token-based | None | ✅ |
+| Gemini 2.5+ | Thinking config | Token budget | 1024 | ✅ |
+| Gemini 3.0+ | Thinking config | Dual (budget + level) | 1024 | ✅ |
### Parameter Support
@@ -1278,7 +1505,8 @@ data: {"type": "content_block_stop"}
| Anthropic | ❌ (binary) | ✅ | ✅ | ✅ |
| Bedrock (Anthropic) | ❌ (binary) | ✅ | ✅ | ✅ |
| Bedrock (Nova) | ✅ (3 levels) | ⚠️ (ignored) | ❌ | ✅ |
-| Gemini | ✅ (implicit) | ✅ | ❌ | ✅ |
+| Gemini 2.5+ | ⚠️ (converts to budget) | ✅ | ❌ | ✅ |
+| Gemini 3.0+ | ✅ (4 levels) | ✅ | ❌ | ✅ |
---
diff --git a/transports/changelog.md b/transports/changelog.md
index eec331ec5..ea2fc1984 100644
--- a/transports/changelog.md
+++ b/transports/changelog.md
@@ -1,4 +1,5 @@
- feat: added support for multiple types in gemini and anthropic structured outputs properties
- fix: added missing logs filter checks in ui for live updates
- fix: ensure request ID is consistently set in context before PreHooks are executed
-- docs: updated docs for xai provider
\ No newline at end of file
+- docs: updated docs for xai provider
+- fix: correct conversion of thinking level to thinking budget and vice versa in gemini
\ No newline at end of file