Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion core/changelog.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
- feat: added support for multiple types in gemini and anthropic structured outputs properties
- fix: ensure request ID is consistently set in context before PreHooks are executed
- fix: ensure request ID is consistently set in context before PreHooks are executed
- fix: correct conversion of thinking level to thinking budget and vice versa in gemini
2 changes: 1 addition & 1 deletion core/providers/gemini/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func ToGeminiChatCompletionRequest(bifrostReq *schemas.BifrostChatRequest) *Gemi

// Convert parameters to generation config
if bifrostReq.Params != nil {
geminiReq.GenerationConfig = convertParamsToGenerationConfig(bifrostReq.Params, []string{})
geminiReq.GenerationConfig = convertParamsToGenerationConfig(bifrostReq.Params, []string{}, bifrostReq.Model)

// Handle tool-related parameters
if len(bifrostReq.Params.Tools) > 0 {
Expand Down
64 changes: 45 additions & 19 deletions core/providers/gemini/responses.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"time"

"github.com/bytedance/sonic"
providerUtils "github.com/maximhq/bifrost/core/providers/utils"
"github.com/maximhq/bifrost/core/schemas"
)

Expand Down Expand Up @@ -2026,28 +2027,53 @@ func (r *GeminiGenerationRequest) convertParamsToGenerationConfigResponses(param
config.ThinkingConfig = &GenerationConfigThinkingConfig{
IncludeThoughts: true,
}
// only set thinking level if max tokens is not set
if params.Reasoning.Effort != nil && params.Reasoning.MaxTokens == nil {
switch *params.Reasoning.Effort {
case "none":
// turn off thinking
config.ThinkingConfig.IncludeThoughts = false
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0))
case "minimal", "low":
config.ThinkingConfig.ThinkingLevel = ThinkingLevelLow
case "medium", "high":
config.ThinkingConfig.ThinkingLevel = ThinkingLevelHigh
}

// Get max tokens for conversions
maxTokens := DefaultCompletionMaxTokens
if config.MaxOutputTokens > 0 {
maxTokens = int(config.MaxOutputTokens)
}
if params.Reasoning.MaxTokens != nil {
switch *params.Reasoning.MaxTokens {
case 0: // turn off thinking
minBudget := DefaultReasoningMinBudget

hasMaxTokens := params.Reasoning.MaxTokens != nil
hasEffort := params.Reasoning.Effort != nil
supportsLevel := isGemini3Plus(r.Model) // Check if model is 3.0+

// PRIORITY RULE: If both max_tokens and effort are present, use ONLY max_tokens (budget)
// This ensures we send only thinkingBudget to Gemini, not thinkingLevel

// Handle "none" effort explicitly (only if max_tokens not present)
if !hasMaxTokens && hasEffort && *params.Reasoning.Effort == "none" {
config.ThinkingConfig.IncludeThoughts = false
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0))
} else if hasMaxTokens {
// User provided max_tokens - use thinkingBudget (all Gemini models support this)
// If both max_tokens and effort are present, we ignore effort and use ONLY max_tokens
budget := *params.Reasoning.MaxTokens
switch budget {
case 0:
config.ThinkingConfig.IncludeThoughts = false
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0))
case -1: // dynamic thinking budget
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(-1))
default: // constrained thinking budget
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(*params.Reasoning.MaxTokens))
case DynamicReasoningBudget: // Special case: -1 means dynamic budget
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(DynamicReasoningBudget))
default:
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budget))
}
} else if hasEffort {
// User provided effort only (no max_tokens)
if supportsLevel {
// Gemini 3.0+ - use thinkingLevel (more native)
config.ThinkingConfig.ThinkingLevel = schemas.Ptr(effortToThinkingLevel(*params.Reasoning.Effort, r.Model))
} else {
// Gemini < 3.0 - must convert effort to budget
budgetTokens, err := providerUtils.GetBudgetTokensFromReasoningEffort(
*params.Reasoning.Effort,
minBudget,
maxTokens,
)
if err == nil {
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budgetTokens))
}
}
}
}
Expand Down
31 changes: 14 additions & 17 deletions core/providers/gemini/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ import (
"github.com/maximhq/bifrost/core/schemas"
)

const MinReasoningMaxTokens = 1 // Minimum max tokens for reasoning - used for estimation of effort level
const DefaultCompletionMaxTokens = 8192 // Default max output tokens for Gemini - used for relative reasoning max token calculation
const DefaultReasoningMinBudget = 1024 // Default minimum reasoning budget for Gemini
const DynamicReasoningBudget = -1 // Special value for dynamic reasoning budget in Gemini

type Role string

const (
Expand Down Expand Up @@ -914,20 +919,20 @@ type GenerationConfigThinkingConfig struct {
ThinkingBudget *int32 `json:"thinkingBudget,omitempty"`

// Optional. Indicates the thinking level.
ThinkingLevel ThinkingLevel `json:"thinkingLevel,omitempty"`
ThinkingLevel *string `json:"thinkingLevel,omitempty"`
}

// Gemini API supports Camel case but genai sdk sends thinking fields as snake_case
// UnmarshalJSON implements custom JSON unmarshaling to support both camelCase and snake_case
func (tc *GenerationConfigThinkingConfig) UnmarshalJSON(data []byte) error {
// Define an auxiliary struct with both camelCase and snake_case tags
type Alias struct {
IncludeThoughts *bool `json:"includeThoughts"`
IncludeThoughtsSnake *bool `json:"include_thoughts"`
ThinkingBudget *int32 `json:"thinkingBudget"`
ThinkingBudgetSnake *int32 `json:"thinking_budget"`
ThinkingLevel *ThinkingLevel `json:"thinkingLevel"`
ThinkingLevelSnake *ThinkingLevel `json:"thinking_level"`
IncludeThoughts *bool `json:"includeThoughts"`
IncludeThoughtsSnake *bool `json:"include_thoughts"`
ThinkingBudget *int32 `json:"thinkingBudget"`
ThinkingBudgetSnake *int32 `json:"thinking_budget"`
ThinkingLevel *string `json:"thinkingLevel"`
ThinkingLevelSnake *string `json:"thinking_level"`
}

var aux Alias
Expand All @@ -949,22 +954,14 @@ func (tc *GenerationConfigThinkingConfig) UnmarshalJSON(data []byte) error {
}

if aux.ThinkingLevel != nil {
tc.ThinkingLevel = *aux.ThinkingLevel
tc.ThinkingLevel = aux.ThinkingLevel
} else if aux.ThinkingLevelSnake != nil {
tc.ThinkingLevel = *aux.ThinkingLevelSnake
tc.ThinkingLevel = aux.ThinkingLevelSnake
}

return nil
}

type ThinkingLevel string

const (
ThinkingLevelUnspecified ThinkingLevel = "THINKING_LEVEL_UNSPECIFIED"
ThinkingLevelLow ThinkingLevel = "LOW"
ThinkingLevelHigh ThinkingLevel = "HIGH"
)

type GeminiBatchEmbeddingRequest struct {
Requests []GeminiEmbeddingRequest `json:"requests,omitempty"`
}
Expand Down
172 changes: 150 additions & 22 deletions core/providers/gemini/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,64 @@ import (
"strings"

"github.com/bytedance/sonic"
providerUtils "github.com/maximhq/bifrost/core/providers/utils"
"github.com/maximhq/bifrost/core/schemas"
)

// isGemini3Plus returns true if the model is Gemini 3.0 or higher
// Uses simple string operations for hot path performance
func isGemini3Plus(model string) bool {
// Convert to lowercase for case-insensitive comparison
model = strings.ToLower(model)

// Find "gemini-" prefix
idx := strings.Index(model, "gemini-")
if idx == -1 {
return false
}

// Get the part after "gemini-"
afterPrefix := model[idx+7:] // len("gemini-") = 7
if len(afterPrefix) == 0 {
return false
}

// Check first character - if it's '3' or higher, it's 3.0+
firstChar := afterPrefix[0]
return firstChar >= '3'
}

// effortToThinkingLevel converts reasoning effort to Gemini ThinkingLevel string
// Pro models only support "low" or "high"
// Other models support "minimal", "low", "medium", and "high"
func effortToThinkingLevel(effort string, model string) string {
isPro := strings.Contains(strings.ToLower(model), "pro")

switch effort {
case "none":
return "" // Empty string for no thinking
case "minimal":
if isPro {
return "low" // Pro models don't support minimal, use low
}
return "minimal"
case "low":
return "low"
case "medium":
if isPro {
return "high" // Pro models don't support medium, use high
}
return "medium"
case "high":
return "high"
default:
if isPro {
return "high"
}
return "medium"
}
}

func (r *GeminiGenerationRequest) convertGenerationConfigToResponsesParameters() *schemas.ResponsesParameters {
params := &schemas.ResponsesParameters{
ExtraParams: make(map[string]interface{}),
Expand Down Expand Up @@ -36,23 +91,56 @@ func (r *GeminiGenerationRequest) convertGenerationConfigToResponsesParameters()
if strings.Contains(r.Model, "openai") {
params.Reasoning.Summary = schemas.Ptr("auto")
}
if config.ThinkingConfig.ThinkingLevel != ThinkingLevelUnspecified {
switch config.ThinkingConfig.ThinkingLevel {
case ThinkingLevelLow:
params.Reasoning.Effort = schemas.Ptr("low")
case ThinkingLevelHigh:
params.Reasoning.Effort = schemas.Ptr("high")
}

// Determine max tokens for conversions
maxTokens := DefaultCompletionMaxTokens
if config.MaxOutputTokens > 0 {
maxTokens = int(config.MaxOutputTokens)
}
minBudget := DefaultReasoningMinBudget

// Priority: Budget first (if present), then Level
if config.ThinkingConfig.ThinkingBudget != nil {
params.Reasoning.MaxTokens = schemas.Ptr(int(*config.ThinkingConfig.ThinkingBudget))
switch *config.ThinkingConfig.ThinkingBudget {
// Budget is set - use it directly
budget := int(*config.ThinkingConfig.ThinkingBudget)
params.Reasoning.MaxTokens = schemas.Ptr(budget)

// Also provide effort for compatibility
effort := providerUtils.GetReasoningEffortFromBudgetTokens(budget, minBudget, maxTokens)
params.Reasoning.Effort = schemas.Ptr(effort)

// Handle special cases
switch budget {
case 0:
params.Reasoning.Effort = schemas.Ptr("none")
case -1:
// dynamic thinking budget
params.Reasoning.Effort = schemas.Ptr("medium")
params.Reasoning.MaxTokens = schemas.Ptr(-1)
case DynamicReasoningBudget:
params.Reasoning.Effort = schemas.Ptr("medium") // dynamic
}
} else if config.ThinkingConfig.ThinkingLevel != nil && *config.ThinkingConfig.ThinkingLevel != "" {
// Level is set (only on 3.0+) - convert to effort and budget
level := *config.ThinkingConfig.ThinkingLevel
var effort string

// Map Gemini thinking level to Bifrost effort
switch level {
case "minimal":
effort = "minimal"
case "low":
effort = "low"
case "medium":
effort = "medium"
case "high":
effort = "high"
default:
effort = "medium"
}

params.Reasoning.Effort = schemas.Ptr(effort)

// Also convert to budget for compatibility
if effort != "none" {
budget, _ := providerUtils.GetBudgetTokensFromReasoningEffort(effort, minBudget, maxTokens)
params.Reasoning.MaxTokens = schemas.Ptr(budget)
}
}
}
Expand Down Expand Up @@ -357,7 +445,7 @@ func convertGeminiUsageMetadataToResponsesUsage(metadata *GenerateContentRespons
}

// convertParamsToGenerationConfig converts Bifrost parameters to Gemini GenerationConfig
func convertParamsToGenerationConfig(params *schemas.ChatParameters, responseModalities []string) GenerationConfig {
func convertParamsToGenerationConfig(params *schemas.ChatParameters, responseModalities []string, model string) GenerationConfig {
config := GenerationConfig{}

// Add response modalities if specified
Expand Down Expand Up @@ -396,14 +484,54 @@ func convertParamsToGenerationConfig(params *schemas.ChatParameters, responseMod
config.ThinkingConfig = &GenerationConfigThinkingConfig{
IncludeThoughts: true,
}
if params.Reasoning.MaxTokens != nil {
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(*params.Reasoning.MaxTokens))
} else if params.Reasoning.Effort != nil {
switch *params.Reasoning.Effort {
case "minimal", "low":
config.ThinkingConfig.ThinkingLevel = ThinkingLevelLow
case "medium", "high":
config.ThinkingConfig.ThinkingLevel = ThinkingLevelHigh

// Get max tokens for conversions
maxTokens := DefaultCompletionMaxTokens
if config.MaxOutputTokens > 0 {
maxTokens = int(config.MaxOutputTokens)
}
minBudget := DefaultReasoningMinBudget

hasMaxTokens := params.Reasoning.MaxTokens != nil
hasEffort := params.Reasoning.Effort != nil
supportsLevel := isGemini3Plus(model) // Check if model is 3.0+

// PRIORITY RULE: If both max_tokens and effort are present, use ONLY max_tokens (budget)
// This ensures we send only thinkingBudget to Gemini, not thinkingLevel

// Handle "none" effort explicitly (only if max_tokens not present)
if !hasMaxTokens && hasEffort && *params.Reasoning.Effort == "none" {
config.ThinkingConfig.IncludeThoughts = false
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0))
} else if hasMaxTokens {
// User provided max_tokens - use thinkingBudget (all Gemini models support this)
// If both max_tokens and effort are present, we ignore effort and use ONLY max_tokens
budget := *params.Reasoning.MaxTokens
switch budget {
case 0:
config.ThinkingConfig.IncludeThoughts = false
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(0))
case DynamicReasoningBudget: // Special case: -1 means dynamic budget
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(DynamicReasoningBudget))
default:
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budget))
}
} else if hasEffort {
// User provided effort only (no max_tokens)
if supportsLevel {
// Gemini 3.0+ - use thinkingLevel (more native)
level := effortToThinkingLevel(*params.Reasoning.Effort, model)
config.ThinkingConfig.ThinkingLevel = &level
} else {
// Gemini < 3.0 - must convert effort to budget
budgetTokens, err := providerUtils.GetBudgetTokensFromReasoningEffort(
*params.Reasoning.Effort,
minBudget,
maxTokens,
)
if err == nil {
config.ThinkingConfig.ThinkingBudget = schemas.Ptr(int32(budgetTokens))
}
}
}
}
Expand Down
Loading