diff --git a/core/providers/anthropic/anthropic.go b/core/providers/anthropic/anthropic.go index 427742edd..7fa24ef2a 100644 --- a/core/providers/anthropic/anthropic.go +++ b/core/providers/anthropic/anthropic.go @@ -304,7 +304,7 @@ func (provider *AnthropicProvider) ChatCompletion(ctx context.Context, key schem jsonData, err := providerUtils.CheckContextAndGetRequestBody( ctx, request, - func() (any, error) { return ToAnthropicChatRequest(request), nil }, + func() (any, error) { return ToAnthropicChatRequest(request) }, provider.GetProviderKey()) if err != nil { return nil, err @@ -355,10 +355,11 @@ func (provider *AnthropicProvider) ChatCompletionStream(ctx context.Context, pos ctx, request, func() (any, error) { - reqBody := ToAnthropicChatRequest(request) - if reqBody != nil { - reqBody.Stream = schemas.Ptr(true) + reqBody, err := ToAnthropicChatRequest(request) + if err != nil { + return nil, err } + reqBody.Stream = schemas.Ptr(true) return reqBody, nil }, provider.GetProviderKey()) @@ -655,7 +656,7 @@ func (provider *AnthropicProvider) Responses(ctx context.Context, key schemas.Ke jsonData, err := providerUtils.CheckContextAndGetRequestBody( ctx, request, - func() (any, error) { return ToAnthropicResponsesRequest(request), nil }, + func() (any, error) { return ToAnthropicResponsesRequest(request) }, provider.GetProviderKey()) if err != nil { return nil, err @@ -704,10 +705,11 @@ func (provider *AnthropicProvider) ResponsesStream(ctx context.Context, postHook ctx, request, func() (any, error) { - reqBody := ToAnthropicResponsesRequest(request) - if reqBody != nil { - reqBody.Stream = schemas.Ptr(true) + reqBody, err := ToAnthropicResponsesRequest(request) + if err != nil { + return nil, err } + reqBody.Stream = schemas.Ptr(true) return reqBody, nil }, provider.GetProviderKey()) diff --git a/core/providers/anthropic/chat.go b/core/providers/anthropic/chat.go index 742a1e5f5..c2e0929d3 100644 --- a/core/providers/anthropic/chat.go +++ b/core/providers/anthropic/chat.go @@ -8,384 +8,11 @@ import ( "github.com/maximhq/bifrost/core/schemas" ) -// ToBifrostChatRequest converts an Anthropic messages request to Bifrost format -func (request *AnthropicMessageRequest) ToBifrostChatRequest() *schemas.BifrostChatRequest { - provider, model := schemas.ParseModelString(request.Model, schemas.Anthropic) - - bifrostReq := &schemas.BifrostChatRequest{ - Provider: provider, - Model: model, - Fallbacks: schemas.ParseFallbacks(request.Fallbacks), - } - - messages := []schemas.ChatMessage{} - - // Add system message if present - if request.System != nil { - if request.System.ContentStr != nil && *request.System.ContentStr != "" { - messages = append(messages, schemas.ChatMessage{ - Role: schemas.ChatMessageRoleSystem, - Content: &schemas.ChatMessageContent{ - ContentStr: request.System.ContentStr, - }, - }) - } else if request.System.ContentBlocks != nil { - contentBlocks := []schemas.ChatContentBlock{} - for _, block := range request.System.ContentBlocks { - if block.Text != nil { // System messages will only have text content - contentBlocks = append(contentBlocks, schemas.ChatContentBlock{ - Type: schemas.ChatContentBlockTypeText, - Text: block.Text, - }) - } - } - messages = append(messages, schemas.ChatMessage{ - Role: schemas.ChatMessageRoleSystem, - Content: &schemas.ChatMessageContent{ - ContentBlocks: contentBlocks, - }, - }) - } - } - - // Convert messages - for _, msg := range request.Messages { - if msg.Content.ContentStr != nil { - // Simple text message - bifrostMsg := schemas.ChatMessage{ - Role: schemas.ChatMessageRole(msg.Role), - Content: &schemas.ChatMessageContent{ - ContentStr: msg.Content.ContentStr, - }, - } - messages = append(messages, bifrostMsg) - } else if msg.Content.ContentBlocks != nil { - // Check if this is a user message with multiple tool results - var toolResults []AnthropicContentBlock - var nonToolContent []AnthropicContentBlock - - for _, content := range msg.Content.ContentBlocks { - if content.Type == AnthropicContentBlockTypeToolResult { - toolResults = append(toolResults, content) - } else { - nonToolContent = append(nonToolContent, content) - } - } - - // If we have tool results, create separate messages for each - if len(toolResults) > 0 { - for _, toolResult := range toolResults { - if toolResult.ToolUseID != nil { - var contentBlocks []schemas.ChatContentBlock - - // Convert tool result content - if toolResult.Content.ContentStr != nil { - contentBlocks = append(contentBlocks, schemas.ChatContentBlock{ - Type: schemas.ChatContentBlockTypeText, - Text: toolResult.Content.ContentStr, - }) - } else if toolResult.Content.ContentBlocks != nil { - for _, block := range toolResult.Content.ContentBlocks { - if block.Text != nil { - contentBlocks = append(contentBlocks, schemas.ChatContentBlock{ - Type: schemas.ChatContentBlockTypeText, - Text: block.Text, - }) - } else if block.Source != nil { - contentBlocks = append(contentBlocks, block.ToBifrostContentImageBlock()) - } - } - } - - toolMsg := schemas.ChatMessage{ - Role: schemas.ChatMessageRoleTool, - ChatToolMessage: &schemas.ChatToolMessage{ - ToolCallID: toolResult.ToolUseID, - }, - Content: &schemas.ChatMessageContent{ - ContentBlocks: contentBlocks, - }, - } - messages = append(messages, toolMsg) - } - } - } - - // Handle non-tool content (regular user/assistant message) - if len(nonToolContent) > 0 { - var bifrostMsg schemas.ChatMessage - bifrostMsg.Role = schemas.ChatMessageRole(msg.Role) - - var toolCalls []schemas.ChatAssistantMessageToolCall - var contentBlocks []schemas.ChatContentBlock - - for _, content := range nonToolContent { - switch content.Type { - case AnthropicContentBlockTypeText: - if content.Text != nil { - contentBlocks = append(contentBlocks, schemas.ChatContentBlock{ - Type: schemas.ChatContentBlockTypeText, - Text: content.Text, - }) - } - case AnthropicContentBlockTypeImage: - if content.Source != nil { - contentBlocks = append(contentBlocks, content.ToBifrostContentImageBlock()) - } - case AnthropicContentBlockTypeToolUse: - if content.ID != nil && content.Name != nil { - tc := schemas.ChatAssistantMessageToolCall{ - Index: uint16(len(toolCalls)), - Type: schemas.Ptr(string(schemas.ChatToolChoiceTypeFunction)), - ID: content.ID, - Function: schemas.ChatAssistantMessageToolCallFunction{ - Name: content.Name, - Arguments: schemas.JsonifyInput(content.Input), - }, - } - toolCalls = append(toolCalls, tc) - } - } - } - - // Set content - if len(contentBlocks) > 0 { - bifrostMsg.Content = &schemas.ChatMessageContent{ - ContentBlocks: contentBlocks, - } - } - - // Set tool calls for assistant messages - if len(toolCalls) > 0 && msg.Role == AnthropicMessageRoleAssistant { - bifrostMsg.ChatAssistantMessage = &schemas.ChatAssistantMessage{ - ToolCalls: toolCalls, - } - } - - messages = append(messages, bifrostMsg) - } - } - } - - bifrostReq.Input = messages - - // Convert parameters - if request.MaxTokens > 0 || request.Temperature != nil || request.TopP != nil || request.TopK != nil || request.StopSequences != nil || request.OutputFormat != nil { - params := &schemas.ChatParameters{ - ExtraParams: make(map[string]interface{}), - } - - if request.MaxTokens > 0 { - params.MaxCompletionTokens = &request.MaxTokens - } - if request.Temperature != nil { - params.Temperature = request.Temperature - } - if request.TopP != nil { - params.TopP = request.TopP - } - if request.TopK != nil { - params.ExtraParams["top_k"] = *request.TopK - } - if request.StopSequences != nil { - params.Stop = request.StopSequences - } - if request.OutputFormat != nil { - params.ResponseFormat = &request.OutputFormat - } - - bifrostReq.Params = params - } - - // Convert tools - if request.Tools != nil { - tools := []schemas.ChatTool{} - for _, tool := range request.Tools { - // Convert input_schema to FunctionParameters - params := schemas.ToolFunctionParameters{ - Type: "object", - } - if tool.InputSchema != nil { - params.Type = tool.InputSchema.Type - params.Required = tool.InputSchema.Required - params.Properties = tool.InputSchema.Properties - } - - tools = append(tools, schemas.ChatTool{ - Type: schemas.ChatToolTypeFunction, - Function: &schemas.ChatToolFunction{ - Name: tool.Name, - Description: tool.Description, - Parameters: ¶ms, - }, - }) - } - if bifrostReq.Params == nil { - bifrostReq.Params = &schemas.ChatParameters{} - } - bifrostReq.Params.Tools = tools - } - - // Convert tool choice - if request.ToolChoice != nil { - if bifrostReq.Params == nil { - bifrostReq.Params = &schemas.ChatParameters{} - } - toolChoice := &schemas.ChatToolChoice{ - ChatToolChoiceStruct: &schemas.ChatToolChoiceStruct{ - Type: func() schemas.ChatToolChoiceType { - if request.ToolChoice.Type == "tool" { - return schemas.ChatToolChoiceTypeFunction - } - return schemas.ChatToolChoiceType(request.ToolChoice.Type) - }(), - }, - } - if request.ToolChoice.Type == "tool" && request.ToolChoice.Name != "" { - toolChoice.ChatToolChoiceStruct.Function = schemas.ChatToolChoiceFunction{ - Name: request.ToolChoice.Name, - } - } - bifrostReq.Params.ToolChoice = toolChoice - } - - return bifrostReq -} - -// ToBifrostChatResponse converts an Anthropic message response to Bifrost format -func (response *AnthropicMessageResponse) ToBifrostChatResponse() *schemas.BifrostChatResponse { - if response == nil { - return nil - } - - // Initialize Bifrost response - bifrostResponse := &schemas.BifrostChatResponse{ - ID: response.ID, - Model: response.Model, - ExtraFields: schemas.BifrostResponseExtraFields{ - RequestType: schemas.ChatCompletionRequest, - Provider: schemas.Anthropic, - }, - Created: int(time.Now().Unix()), - } - - // Collect all content and tool calls into a single message - var toolCalls []schemas.ChatAssistantMessageToolCall - var contentBlocks []schemas.ChatContentBlock - var contentStr *string - - // Process content and tool calls - if response.Content != nil { - if len(response.Content) == 1 && response.Content[0].Type == AnthropicContentBlockTypeText { - contentStr = response.Content[0].Text - } else { - for _, c := range response.Content { - switch c.Type { - case AnthropicContentBlockTypeText: - if c.Text != nil { - contentBlocks = append(contentBlocks, schemas.ChatContentBlock{ - Type: schemas.ChatContentBlockTypeText, - Text: c.Text, - }) - } - case AnthropicContentBlockTypeToolUse: - if c.ID != nil && c.Name != nil { - function := schemas.ChatAssistantMessageToolCallFunction{ - Name: c.Name, - } - - // Marshal the input to JSON string - if c.Input != nil { - args, err := json.Marshal(c.Input) - if err != nil { - function.Arguments = fmt.Sprintf("%v", c.Input) - } else { - function.Arguments = string(args) - } - } else { - function.Arguments = "{}" - } - - toolCalls = append(toolCalls, schemas.ChatAssistantMessageToolCall{ - Index: uint16(len(toolCalls)), - Type: schemas.Ptr(string(schemas.ChatToolTypeFunction)), - ID: c.ID, - Function: function, - }) - } - } - } - } - } - - // Create a single choice with the collected content - // Create message content - messageContent := schemas.ChatMessageContent{ - ContentStr: contentStr, - ContentBlocks: contentBlocks, - } - - // Create the assistant message - var assistantMessage *schemas.ChatAssistantMessage - - // Create AssistantMessage if we have tool calls or thinking - if len(toolCalls) > 0 { - assistantMessage = &schemas.ChatAssistantMessage{ - ToolCalls: toolCalls, - } - } - - // Create message - message := schemas.ChatMessage{ - Role: schemas.ChatMessageRoleAssistant, - Content: &messageContent, - ChatAssistantMessage: assistantMessage, - } - - // Create choice - choice := schemas.BifrostResponseChoice{ - Index: 0, - ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{ - Message: &message, - StopString: response.StopSequence, - }, - FinishReason: func() *string { - if response.StopReason != "" { - mapped := ConvertAnthropicFinishReasonToBifrost(response.StopReason) - return &mapped - } - return nil - }(), - } - - bifrostResponse.Choices = []schemas.BifrostResponseChoice{choice} - - // Convert usage information - if response.Usage != nil { - bifrostResponse.Usage = &schemas.BifrostLLMUsage{ - PromptTokens: response.Usage.InputTokens, - PromptTokensDetails: &schemas.ChatPromptTokensDetails{ - CachedTokens: response.Usage.CacheCreationInputTokens + response.Usage.CacheReadInputTokens, - }, - CompletionTokens: response.Usage.OutputTokens, - TotalTokens: response.Usage.InputTokens + response.Usage.OutputTokens, - } - if response.Usage.CacheCreationInputTokens > 0 { - if bifrostResponse.Usage.CompletionTokensDetails == nil { - bifrostResponse.Usage.CompletionTokensDetails = &schemas.ChatCompletionTokensDetails{} - } - bifrostResponse.Usage.CompletionTokensDetails.CachedTokens = response.Usage.CacheCreationInputTokens - } - } - - return bifrostResponse -} - // ToAnthropicChatRequest converts a Bifrost request to Anthropic format // This is the reverse of ConvertChatRequestToBifrost for provider-side usage -func ToAnthropicChatRequest(bifrostReq *schemas.BifrostChatRequest) *AnthropicMessageRequest { +func ToAnthropicChatRequest(bifrostReq *schemas.BifrostChatRequest) (*AnthropicMessageRequest, error) { if bifrostReq == nil || bifrostReq.Input == nil { - return nil + return nil, fmt.Errorf("bifrost request is nil or input is nil") } messages := bifrostReq.Input @@ -468,6 +95,26 @@ func ToAnthropicChatRequest(bifrostReq *schemas.BifrostChatRequest) *AnthropicMe } anthropicReq.ToolChoice = toolChoice } + + // Convert reasoning + if bifrostReq.Params.Reasoning != nil { + if bifrostReq.Params.Reasoning.Effort != nil && *bifrostReq.Params.Reasoning.Effort == "none" { + anthropicReq.Thinking = &AnthropicThinking{ + Type: "disabled", + } + } else { + if bifrostReq.Params.Reasoning.MaxTokens == nil { + return nil, fmt.Errorf("reasoning.max_tokens is required for reasoning") + } else if *bifrostReq.Params.Reasoning.MaxTokens < MinimumReasoningMaxTokens { + return nil, fmt.Errorf("reasoning.max_tokens must be greater than or equal to %d", MinimumReasoningMaxTokens) + } else { + anthropicReq.Thinking = &AnthropicThinking{ + Type: "enabled", + BudgetTokens: bifrostReq.Params.Reasoning.MaxTokens, + } + } + } + } } // Convert messages - group consecutive tool messages into single user messages @@ -557,6 +204,17 @@ func ToAnthropicChatRequest(bifrostReq *schemas.BifrostChatRequest) *AnthropicMe var content []AnthropicContentBlock + // First add reasoning details + if msg.ChatAssistantMessage != nil && msg.ChatAssistantMessage.ReasoningDetails != nil { + for _, reasoningDetail := range msg.ChatAssistantMessage.ReasoningDetails { + content = append(content, AnthropicContentBlock{ + Type: AnthropicContentBlockTypeThinking, + Signature: reasoningDetail.Signature, + Thinking: reasoningDetail.Text, + }) + } + } + if msg.Content != nil { // Convert text content if msg.Content.ContentStr != nil { @@ -616,11 +274,164 @@ func ToAnthropicChatRequest(bifrostReq *schemas.BifrostChatRequest) *AnthropicMe anthropicReq.Messages = anthropicMessages anthropicReq.System = systemContent - return anthropicReq + return anthropicReq, nil +} + +// ToBifrostChatResponse converts an Anthropic message response to Bifrost format +func (response *AnthropicMessageResponse) ToBifrostChatResponse() *schemas.BifrostChatResponse { + if response == nil { + return nil + } + + // Initialize Bifrost response + bifrostResponse := &schemas.BifrostChatResponse{ + ID: response.ID, + Model: response.Model, + ExtraFields: schemas.BifrostResponseExtraFields{ + RequestType: schemas.ChatCompletionRequest, + Provider: schemas.Anthropic, + }, + Created: int(time.Now().Unix()), + } + + // Collect all content and tool calls into a single message + var toolCalls []schemas.ChatAssistantMessageToolCall + var contentBlocks []schemas.ChatContentBlock + var reasoningDetails []schemas.ChatReasoningDetails + var reasoningText string + var contentStr *string + + // Process content and tool calls + if response.Content != nil { + if len(response.Content) == 1 && response.Content[0].Type == AnthropicContentBlockTypeText { + contentStr = response.Content[0].Text + } else { + for _, c := range response.Content { + switch c.Type { + case AnthropicContentBlockTypeText: + if c.Text != nil { + contentBlocks = append(contentBlocks, schemas.ChatContentBlock{ + Type: schemas.ChatContentBlockTypeText, + Text: c.Text, + }) + } + case AnthropicContentBlockTypeToolUse: + if c.ID != nil && c.Name != nil { + function := schemas.ChatAssistantMessageToolCallFunction{ + Name: c.Name, + } + + // Marshal the input to JSON string + if c.Input != nil { + args, err := json.Marshal(c.Input) + if err != nil { + function.Arguments = fmt.Sprintf("%v", c.Input) + } else { + function.Arguments = string(args) + } + } else { + function.Arguments = "{}" + } + + toolCalls = append(toolCalls, schemas.ChatAssistantMessageToolCall{ + Index: uint16(len(toolCalls)), + Type: schemas.Ptr(string(schemas.ChatToolTypeFunction)), + ID: c.ID, + Function: function, + }) + } + case AnthropicContentBlockTypeThinking: + reasoningDetails = append(reasoningDetails, schemas.ChatReasoningDetails{ + Index: len(reasoningDetails), + Type: schemas.BifrostReasoningDetailsTypeText, + Text: c.Thinking, + Signature: c.Signature, + }) + if c.Thinking != nil { + reasoningText += *c.Thinking + "\n" + } + } + } + } + } + + if len(contentBlocks) == 1 && contentBlocks[0].Type == schemas.ChatContentBlockTypeText { + contentStr = contentBlocks[0].Text + contentBlocks = nil + } + + // Create a single choice with the collected content + // Create message content + messageContent := schemas.ChatMessageContent{ + ContentStr: contentStr, + ContentBlocks: contentBlocks, + } + + // Create the assistant message + var assistantMessage *schemas.ChatAssistantMessage + + // Create AssistantMessage if we have tool calls or thinking + if len(toolCalls) > 0 { + assistantMessage = &schemas.ChatAssistantMessage{ + ToolCalls: toolCalls, + } + } + + if len(reasoningDetails) > 0 { + if assistantMessage == nil { + assistantMessage = &schemas.ChatAssistantMessage{} + } + assistantMessage.ReasoningDetails = reasoningDetails + if reasoningText != "" { + assistantMessage.Reasoning = &reasoningText + } + } + + // Create message + message := schemas.ChatMessage{ + Role: schemas.ChatMessageRoleAssistant, + Content: &messageContent, + ChatAssistantMessage: assistantMessage, + } + + // Create choice + choice := schemas.BifrostResponseChoice{ + Index: 0, + ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{ + Message: &message, + StopString: response.StopSequence, + }, + FinishReason: func() *string { + if response.StopReason != "" { + mapped := ConvertAnthropicFinishReasonToBifrost(response.StopReason) + return &mapped + } + return nil + }(), + } + + bifrostResponse.Choices = []schemas.BifrostResponseChoice{choice} + + // Convert usage information + if response.Usage != nil { + bifrostResponse.Usage = &schemas.BifrostLLMUsage{ + PromptTokens: response.Usage.InputTokens, + PromptTokensDetails: &schemas.ChatPromptTokensDetails{ + CachedTokens: response.Usage.CacheReadInputTokens, + }, + CompletionTokens: response.Usage.OutputTokens, + CompletionTokensDetails: &schemas.ChatCompletionTokensDetails{ + CachedTokens: response.Usage.CacheCreationInputTokens, + }, + TotalTokens: response.Usage.InputTokens + response.Usage.OutputTokens, + } + } + + return bifrostResponse } -// ToAnthropicChatCompletionResponse converts a Bifrost response to Anthropic format -func ToAnthropicChatCompletionResponse(bifrostResp *schemas.BifrostChatResponse) *AnthropicMessageResponse { +// ToAnthropicChatResponse converts a Bifrost response to Anthropic format +func ToAnthropicChatResponse(bifrostResp *schemas.BifrostChatResponse) *AnthropicMessageResponse { if bifrostResp == nil { return nil } @@ -656,17 +467,32 @@ func ToAnthropicChatCompletionResponse(bifrostResp *schemas.BifrostChatResponse) if choice.FinishReason != nil { anthropicResp.StopReason = ConvertBifrostFinishReasonToAnthropic(*choice.FinishReason) } - if choice.StopString != nil { + if choice.ChatNonStreamResponseChoice != nil && choice.StopString != nil { anthropicResp.StopSequence = choice.StopString } + // Add reasoning content + if choice.ChatNonStreamResponseChoice != nil && choice.Message != nil && choice.Message.ChatAssistantMessage != nil && choice.Message.ChatAssistantMessage.ReasoningDetails != nil { + for _, reasoningDetail := range choice.Message.ChatAssistantMessage.ReasoningDetails { + if reasoningDetail.Type == schemas.BifrostReasoningDetailsTypeText && reasoningDetail.Text != nil && + ((reasoningDetail.Text != nil && *reasoningDetail.Text != "") || + (reasoningDetail.Signature != nil && *reasoningDetail.Signature != "")) { + content = append(content, AnthropicContentBlock{ + Type: AnthropicContentBlockTypeThinking, + Thinking: reasoningDetail.Text, + Signature: reasoningDetail.Signature, + }) + } + } + } + // Add text content - if choice.Message.Content.ContentStr != nil && *choice.Message.Content.ContentStr != "" { + if choice.ChatNonStreamResponseChoice != nil && choice.Message != nil && choice.Message.Content != nil && choice.Message.Content.ContentStr != nil && *choice.Message.Content.ContentStr != "" { content = append(content, AnthropicContentBlock{ Type: AnthropicContentBlockTypeText, Text: choice.Message.Content.ContentStr, }) - } else if choice.Message.Content.ContentBlocks != nil { + } else if choice.ChatNonStreamResponseChoice != nil && choice.Message != nil && choice.Message.Content != nil && choice.Message.Content.ContentBlocks != nil { for _, block := range choice.Message.Content.ContentBlocks { if block.Text != nil { content = append(content, AnthropicContentBlock{ @@ -678,7 +504,7 @@ func ToAnthropicChatCompletionResponse(bifrostResp *schemas.BifrostChatResponse) } // Add tool calls as tool_use content - if choice.Message.ChatAssistantMessage != nil && choice.Message.ChatAssistantMessage.ToolCalls != nil { + if choice.ChatNonStreamResponseChoice != nil && choice.Message != nil && choice.Message.ChatAssistantMessage != nil && choice.Message.ChatAssistantMessage.ToolCalls != nil { for _, toolCall := range choice.Message.ChatAssistantMessage.ToolCalls { // Parse arguments JSON string back to map var input map[string]interface{} @@ -812,7 +638,14 @@ func (chunk *AnthropicStreamEvent) ToBifrostChatCompletionStream() (*schemas.Bif Index: 0, ChatStreamResponseChoice: &schemas.ChatStreamResponseChoice{ Delta: &schemas.ChatStreamResponseChoiceDelta{ - Thought: chunk.Delta.Thinking, + Reasoning: chunk.Delta.Thinking, + ReasoningDetails: []schemas.ChatReasoningDetails{ + { + Index: 0, + Type: schemas.BifrostReasoningDetailsTypeText, + Text: chunk.Delta.Thinking, + }, + }, }, }, }, @@ -823,9 +656,29 @@ func (chunk *AnthropicStreamEvent) ToBifrostChatCompletionStream() (*schemas.Bif } case AnthropicStreamDeltaTypeSignature: - // Handle signature verification for thinking content - // This is used to verify the integrity of thinking content - + if chunk.Delta.Signature != nil && *chunk.Delta.Signature != "" { + // Create streaming response for signature delta + streamResponse := &schemas.BifrostChatResponse{ + Object: "chat.completion.chunk", + Choices: []schemas.BifrostResponseChoice{ + { + Index: 0, + ChatStreamResponseChoice: &schemas.ChatStreamResponseChoice{ + Delta: &schemas.ChatStreamResponseChoiceDelta{ + ReasoningDetails: []schemas.ChatReasoningDetails{ + { + Index: 0, + Type: schemas.BifrostReasoningDetailsTypeText, + Signature: chunk.Delta.Signature, + }, + }, + }, + }, + }, + }, + } + return streamResponse, nil, false + } } } @@ -858,8 +711,8 @@ func (chunk *AnthropicStreamEvent) ToBifrostChatCompletionStream() (*schemas.Bif return nil, nil, false } -// ToAnthropicChatCompletionStreamResponse converts a Bifrost streaming response to Anthropic SSE string format -func ToAnthropicChatCompletionStreamResponse(bifrostResp *schemas.BifrostChatResponse) string { +// ToAnthropicChatStreamResponse converts a Bifrost streaming response to Anthropic SSE string format +func ToAnthropicChatStreamResponse(bifrostResp *schemas.BifrostChatResponse) string { if bifrostResp == nil { return "" } @@ -882,13 +735,21 @@ func ToAnthropicChatCompletionStreamResponse(bifrostResp *schemas.BifrostChatRes Type: AnthropicStreamDeltaTypeText, Text: delta.Content, } - } else if delta.Thought != nil { + } else if delta.Reasoning != nil { // Handle thinking content deltas streamResp.Type = "content_block_delta" streamResp.Index = &choice.Index streamResp.Delta = &AnthropicStreamDelta{ Type: AnthropicStreamDeltaTypeThinking, - Thinking: delta.Thought, + Thinking: delta.Reasoning, + } + } else if len(delta.ReasoningDetails) > 0 && delta.ReasoningDetails[0].Signature != nil && *delta.ReasoningDetails[0].Signature != "" { + // Handle signature deltas + streamResp.Type = "content_block_delta" + streamResp.Index = &choice.Index + streamResp.Delta = &AnthropicStreamDelta{ + Type: AnthropicStreamDeltaTypeSignature, + Signature: delta.ReasoningDetails[0].Signature, } } else if len(delta.ToolCalls) > 0 { // Handle tool call deltas @@ -990,8 +851,8 @@ func ToAnthropicChatCompletionStreamResponse(bifrostResp *schemas.BifrostChatRes return fmt.Sprintf("event: %s\ndata: %s\n\n", streamResp.Type, jsonData) } -// ToAnthropicChatCompletionStreamError converts a BifrostError to Anthropic streaming error in SSE format -func ToAnthropicChatCompletionStreamError(bifrostErr *schemas.BifrostError) string { +// ToAnthropicChatStreamError converts a BifrostError to Anthropic streaming error in SSE format +func ToAnthropicChatStreamError(bifrostErr *schemas.BifrostError) string { errorResp := ToAnthropicChatCompletionError(bifrostErr) if errorResp == nil { return "" @@ -1004,27 +865,3 @@ func ToAnthropicChatCompletionStreamError(bifrostErr *schemas.BifrostError) stri // Format as Anthropic SSE error event return fmt.Sprintf("event: error\ndata: %s\n\n", jsonData) } - -// ToAnthropicChatCompletionError converts a BifrostError to AnthropicMessageError -func ToAnthropicChatCompletionError(bifrostErr *schemas.BifrostError) *AnthropicMessageError { - if bifrostErr == nil { - return nil - } - - // Provide blank strings for nil pointer fields - errorType := "" - if bifrostErr.Type != nil { - errorType = *bifrostErr.Type - } - - // Handle nested error fields with nil checks - errorStruct := AnthropicMessageErrorStruct{ - Type: errorType, - Message: bifrostErr.Error.Message, - } - - return &AnthropicMessageError{ - Type: "error", // always "error" for Anthropic - Error: errorStruct, - } -} diff --git a/core/providers/anthropic/errors.go b/core/providers/anthropic/errors.go index aa3331707..58ea8fcc9 100644 --- a/core/providers/anthropic/errors.go +++ b/core/providers/anthropic/errors.go @@ -6,6 +6,36 @@ import ( "github.com/valyala/fasthttp" ) +// ToAnthropicChatCompletionError converts a BifrostError to AnthropicMessageError +func ToAnthropicChatCompletionError(bifrostErr *schemas.BifrostError) *AnthropicMessageError { + if bifrostErr == nil { + return nil + } + + // Provide blank strings for nil pointer fields + errorType := "" + if bifrostErr.Type != nil { + errorType = *bifrostErr.Type + } + + // Safely extract message from nested error + message := "" + if bifrostErr.Error != nil { + message = bifrostErr.Error.Message + } + + // Handle nested error fields with nil checks + errorStruct := AnthropicMessageErrorStruct{ + Type: errorType, + Message: message, + } + + return &AnthropicMessageError{ + Type: "error", // always "error" for Anthropic + Error: errorStruct, + } +} + func parseAnthropicError(resp *fasthttp.Response) *schemas.BifrostError { var errorResp AnthropicError bifrostErr := providerUtils.HandleProviderAPIError(resp, &errorResp) diff --git a/core/providers/anthropic/responses.go b/core/providers/anthropic/responses.go index 47d13a4a7..e92bdd5e3 100644 --- a/core/providers/anthropic/responses.go +++ b/core/providers/anthropic/responses.go @@ -187,6 +187,18 @@ func (request *AnthropicMessageRequest) ToBifrostResponsesRequest() *schemas.Bif if request.OutputFormat != nil { params.Text = convertAnthropicOutputFormatToResponsesTextConfig(request.OutputFormat) } + if request.Thinking != nil { + if request.Thinking.Type == "enabled" { + params.Reasoning = &schemas.ResponsesParametersReasoning{ + Effort: schemas.Ptr("auto"), + MaxTokens: request.Thinking.BudgetTokens, + } + } else { + params.Reasoning = &schemas.ResponsesParametersReasoning{ + Effort: schemas.Ptr("none"), + } + } + } // Add trucation parameter if computer tool is being used if provider == schemas.OpenAI && request.Tools != nil { @@ -281,7 +293,11 @@ func (request *AnthropicMessageRequest) ToBifrostResponsesRequest() *schemas.Bif } // ToAnthropicResponsesRequest converts a BifrostRequest with Responses structure back to AnthropicMessageRequest -func ToAnthropicResponsesRequest(bifrostReq *schemas.BifrostResponsesRequest) *AnthropicMessageRequest { +func ToAnthropicResponsesRequest(bifrostReq *schemas.BifrostResponsesRequest) (*AnthropicMessageRequest, error) { + if bifrostReq == nil { + return nil, fmt.Errorf("bifrost request is nil") + } + anthropicReq := &AnthropicMessageRequest{ Model: bifrostReq.Model, MaxTokens: AnthropicDefaultMaxTokens, @@ -306,6 +322,28 @@ func ToAnthropicResponsesRequest(bifrostReq *schemas.BifrostResponsesRequest) *A if bifrostReq.Params.Text != nil { anthropicReq.OutputFormat = convertResponsesTextConfigToAnthropicOutputFormat(bifrostReq.Params.Text) } + if bifrostReq.Params.Reasoning != nil { + if bifrostReq.Params.Reasoning.Effort != nil { + if *bifrostReq.Params.Reasoning.Effort != "none" { + if bifrostReq.Params.Reasoning.MaxTokens != nil { + if *bifrostReq.Params.Reasoning.MaxTokens < MinimumReasoningMaxTokens { + return nil, fmt.Errorf("reasoning.max_tokens must be greater than or equal to %d", MinimumReasoningMaxTokens) + } else { + anthropicReq.Thinking = &AnthropicThinking{ + Type: "enabled", + BudgetTokens: bifrostReq.Params.Reasoning.MaxTokens, + } + } + } else { + return nil, fmt.Errorf("reasoning.max_tokens is required for reasoning") + } + } else { + anthropicReq.Thinking = &AnthropicThinking{ + Type: "disabled", + } + } + } + } if bifrostReq.Params.ExtraParams != nil { topK, ok := schemas.SafeExtractIntPointer(bifrostReq.Params.ExtraParams["top_k"]) if ok { @@ -314,22 +352,6 @@ func ToAnthropicResponsesRequest(bifrostReq *schemas.BifrostResponsesRequest) *A if stop, ok := schemas.SafeExtractStringSlice(bifrostReq.Params.ExtraParams["stop"]); ok { anthropicReq.StopSequences = stop } - if thinking, ok := schemas.SafeExtractFromMap(bifrostReq.Params.ExtraParams, "thinking"); ok { - if anthropicThinking, ok := thinking.(*AnthropicThinking); ok { - anthropicReq.Thinking = anthropicThinking - } else if thinkingMap, ok := thinking.(map[string]interface{}); ok { - anthropicThinking := &AnthropicThinking{} - if thinkingType, ok := thinkingMap["type"].(string); ok { - anthropicThinking.Type = thinkingType - } - // Handle budget_tokens - JSON numbers can be float64 or int - budgetTokens, ok := schemas.SafeExtractInt(thinkingMap["budget_tokens"]) - if ok { - anthropicThinking.BudgetTokens = &budgetTokens - } - anthropicReq.Thinking = anthropicThinking - } - } } // Convert tools @@ -379,7 +401,7 @@ func ToAnthropicResponsesRequest(bifrostReq *schemas.BifrostResponsesRequest) *A anthropicReq.Messages = anthropicMessages } - return anthropicReq + return anthropicReq, nil } // ToBifrostResponsesResponse converts an Anthropic response to BifrostResponse with Responses structure @@ -1405,11 +1427,17 @@ func ToAnthropicResponsesStreamError(bifrostErr *schemas.BifrostError) string { return "" } + // Safely extract message from nested error + message := "" + if bifrostErr.Error != nil { + message = bifrostErr.Error.Message + } + streamResp := &AnthropicStreamEvent{ Type: AnthropicStreamEventTypeError, Error: &AnthropicStreamError{ Type: "error", - Message: bifrostErr.Error.Message, + Message: message, }, } diff --git a/core/providers/anthropic/types.go b/core/providers/anthropic/types.go index 2f8246e7d..738d219e0 100644 --- a/core/providers/anthropic/types.go +++ b/core/providers/anthropic/types.go @@ -11,6 +11,7 @@ import ( // Since Anthropic always needs to have a max_tokens parameter, we set a default value if not provided. const ( AnthropicDefaultMaxTokens = 4096 + MinimumReasoningMaxTokens = 1024 ) // ==================== REQUEST TYPES ==================== @@ -51,7 +52,7 @@ type AnthropicMessageRequest struct { ToolChoice *AnthropicToolChoice `json:"tool_choice,omitempty"` MCPServers []AnthropicMCPServer `json:"mcp_servers,omitempty"` // This feature requires the beta header: "anthropic-beta": "mcp-client-2025-04-04" Thinking *AnthropicThinking `json:"thinking,omitempty"` - OutputFormat interface{} `json:"output_format,omitempty"` // This feature requires the beta header: "anthropic-beta": "structured-outputs-2025-11-13" and currently only supported for Claude Sonnet 4.5 and Claude Opus 4.1 + OutputFormat interface{} `json:"output_format,omitempty"` // This feature requires the beta header: "anthropic-beta": "structured-outputs-2025-11-13" and currently only supported for Claude Sonnet 4.5 and Claude Opus 4.1 // Bifrost specific field (only parsed when converting from Provider -> Bifrost request) Fallbacks []string `json:"fallbacks,omitempty"` diff --git a/core/providers/azure/azure.go b/core/providers/azure/azure.go index 2655a1c72..c344a7606 100644 --- a/core/providers/azure/azure.go +++ b/core/providers/azure/azure.go @@ -350,7 +350,10 @@ func (provider *AzureProvider) ChatCompletion(ctx context.Context, key schemas.K request, func() (any, error) { if schemas.IsAnthropicModel(deployment) { - reqBody := anthropic.ToAnthropicChatRequest(request) + reqBody, err := anthropic.ToAnthropicChatRequest(request) + if err != nil { + return nil, err + } if reqBody != nil { reqBody.Model = deployment } @@ -446,7 +449,10 @@ func (provider *AzureProvider) ChatCompletionStream(ctx context.Context, postHoo ctx, request, func() (any, error) { - reqBody := anthropic.ToAnthropicChatRequest(request) + reqBody, err := anthropic.ToAnthropicChatRequest(request) + if err != nil { + return nil, err + } if reqBody != nil { reqBody.Model = deployment reqBody.Stream = schemas.Ptr(true) @@ -522,7 +528,10 @@ func (provider *AzureProvider) Responses(ctx context.Context, key schemas.Key, r request, func() (any, error) { if schemas.IsAnthropicModel(deployment) { - reqBody := anthropic.ToAnthropicResponsesRequest(request) + reqBody, err := anthropic.ToAnthropicResponsesRequest(request) + if err != nil { + return nil, err + } if reqBody != nil { reqBody.Model = deployment } @@ -619,7 +628,10 @@ func (provider *AzureProvider) ResponsesStream(ctx context.Context, postHookRunn ctx, request, func() (any, error) { - reqBody := anthropic.ToAnthropicResponsesRequest(request) + reqBody, err := anthropic.ToAnthropicResponsesRequest(request) + if err != nil { + return nil, err + } if reqBody != nil { reqBody.Model = deployment reqBody.Stream = schemas.Ptr(true) diff --git a/core/providers/bedrock/bedrock.go b/core/providers/bedrock/bedrock.go index 500cd11fd..6ea273a7a 100644 --- a/core/providers/bedrock/bedrock.go +++ b/core/providers/bedrock/bedrock.go @@ -18,6 +18,7 @@ import ( v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4" "github.com/aws/aws-sdk-go-v2/config" "github.com/bytedance/sonic" + "github.com/google/uuid" "github.com/maximhq/bifrost/core/providers/anthropic" "github.com/maximhq/bifrost/core/providers/cohere" providerUtils "github.com/maximhq/bifrost/core/providers/utils" @@ -757,7 +758,6 @@ func (provider *BedrockProvider) ChatCompletionStream(ctx context.Context, postH defer resp.Body.Close() // Process AWS Event Stream format - var messageID string usage := &schemas.BifrostLLMUsage{} var finishReason *string chunkIndex := 0 @@ -768,6 +768,9 @@ func (provider *BedrockProvider) ChatCompletionStream(ctx context.Context, postH decoder := eventstream.NewDecoder() payloadBuf := make([]byte, 0, 1024*1024) // 1MB payload buffer + // Bedrock does not provide a unique identifier for the stream, so we generate one ourselves + id := uuid.New().String() + for { // Decode a single EventStream message message, err := decoder.Decode(resp.Body, payloadBuf) @@ -853,7 +856,7 @@ func (provider *BedrockProvider) ChatCompletionStream(ctx context.Context, postH return } if response != nil { - response.ID = messageID + response.ID = id response.Model = request.Model response.ExtraFields = schemas.BifrostResponseExtraFields{ RequestType: schemas.ChatCompletionStreamRequest, @@ -876,7 +879,7 @@ func (provider *BedrockProvider) ChatCompletionStream(ctx context.Context, postH } // Send final response - response := providerUtils.CreateBifrostChatCompletionChunkResponse(messageID, usage, finishReason, chunkIndex, schemas.ChatCompletionStreamRequest, providerName, request.Model) + response := providerUtils.CreateBifrostChatCompletionChunkResponse(id, usage, finishReason, chunkIndex, schemas.ChatCompletionStreamRequest, providerName, request.Model) response.ExtraFields.ModelDeployment = deployment response.ExtraFields.Latency = time.Since(startTime).Milliseconds() ctx = context.WithValue(ctx, schemas.BifrostContextKeyStreamEndIndicator, true) diff --git a/core/providers/bedrock/chat.go b/core/providers/bedrock/chat.go index d77e252cf..714a2f357 100644 --- a/core/providers/bedrock/chat.go +++ b/core/providers/bedrock/chat.go @@ -36,7 +36,9 @@ func ToBedrockChatCompletionRequest(ctx *context.Context, bifrostReq *schemas.Bi } // Convert parameters and configurations - convertChatParameters(ctx, bifrostReq, bedrockReq) + if err := convertChatParameters(ctx, bifrostReq, bedrockReq); err != nil { + return nil, fmt.Errorf("failed to convert chat parameters: %w", err) + } // Ensure tool config is present when needed ensureChatToolConfigForConversation(bifrostReq, bedrockReq) @@ -54,6 +56,8 @@ func (response *BedrockConverseResponse) ToBifrostChatResponse(ctx context.Conte var contentStr *string var contentBlocks []schemas.ChatContentBlock var toolCalls []schemas.ChatAssistantMessageToolCall + var reasoningDetails []schemas.ChatReasoningDetails + var reasoningText string if response.Output.Message != nil { if len(response.Output.Message.Content) == 1 && response.Output.Message.Content[0].Text != nil { @@ -113,11 +117,30 @@ func (response *BedrockConverseResponse) ToBifrostChatResponse(ctx context.Conte }, }) } + + // Handle reasoning content + if contentBlock.ReasoningContent != nil { + if contentBlock.ReasoningContent.ReasoningText == nil { + continue + } + reasoningDetails = append(reasoningDetails, schemas.ChatReasoningDetails{ + Index: len(reasoningDetails), + Type: schemas.BifrostReasoningDetailsTypeText, + Text: schemas.Ptr(contentBlock.ReasoningContent.ReasoningText.Text), + Signature: contentBlock.ReasoningContent.ReasoningText.Signature, + }) + reasoningText += contentBlock.ReasoningContent.ReasoningText.Text + "\n" + } } } } } + if len(contentBlocks) == 1 && contentBlocks[0].Type == schemas.ChatContentBlockTypeText { + contentStr = contentBlocks[0].Text + contentBlocks = nil + } + // Create the message content messageContent := schemas.ChatMessageContent{ ContentStr: contentStr, @@ -131,6 +154,13 @@ func (response *BedrockConverseResponse) ToBifrostChatResponse(ctx context.Conte ToolCalls: toolCalls, } } + if len(reasoningDetails) > 0 { + if assistantMessage == nil { + assistantMessage = &schemas.ChatAssistantMessage{} + } + assistantMessage.ReasoningDetails = reasoningDetails + assistantMessage.Reasoning = schemas.Ptr(reasoningText) + } // Create the response choice choices := []schemas.BifrostResponseChoice{ @@ -300,6 +330,61 @@ func (chunk *BedrockStreamEvent) ToBifrostChatCompletionStream() (*schemas.Bifro }, } + return streamResponse, nil, false + + case chunk.Delta.ReasoningContent != nil: + // Handle reasoning content delta + reasoningContentDelta := chunk.Delta.ReasoningContent + + // Only construct and return a response when either Text or Signature is set + if reasoningContentDelta.Text == "" && reasoningContentDelta.Signature == nil { + return nil, nil, false + } + + var streamResponse *schemas.BifrostChatResponse + if reasoningContentDelta.Text != "" { + streamResponse = &schemas.BifrostChatResponse{ + Object: "chat.completion.chunk", + Choices: []schemas.BifrostResponseChoice{ + { + Index: 0, + ChatStreamResponseChoice: &schemas.ChatStreamResponseChoice{ + Delta: &schemas.ChatStreamResponseChoiceDelta{ + Reasoning: schemas.Ptr(reasoningContentDelta.Text), + ReasoningDetails: []schemas.ChatReasoningDetails{ + { + Index: 0, + Type: schemas.BifrostReasoningDetailsTypeText, + Text: schemas.Ptr(reasoningContentDelta.Text), + }, + }, + }, + }, + }, + }, + } + } else if reasoningContentDelta.Signature != nil { + streamResponse = &schemas.BifrostChatResponse{ + Object: "chat.completion.chunk", + Choices: []schemas.BifrostResponseChoice{ + { + Index: 0, + ChatStreamResponseChoice: &schemas.ChatStreamResponseChoice{ + Delta: &schemas.ChatStreamResponseChoiceDelta{ + ReasoningDetails: []schemas.ChatReasoningDetails{ + { + Index: 0, + Type: schemas.BifrostReasoningDetailsTypeText, + Signature: reasoningContentDelta.Signature, + }, + }, + }, + }, + }, + }, + } + } + return streamResponse, nil, false } } diff --git a/core/providers/bedrock/responses.go b/core/providers/bedrock/responses.go index e07b8e6b3..d028bff48 100644 --- a/core/providers/bedrock/responses.go +++ b/core/providers/bedrock/responses.go @@ -6,6 +6,7 @@ import ( "sync" "time" + "github.com/maximhq/bifrost/core/providers/anthropic" "github.com/maximhq/bifrost/core/schemas" ) @@ -345,23 +346,25 @@ func (request *BedrockConverseRequest) ToBifrostResponsesRequest() (*schemas.Bif // Convert additional model request fields to extra params if len(request.AdditionalModelRequestFields) > 0 { - if bifrostReq.Params.ExtraParams == nil { - bifrostReq.Params.ExtraParams = make(map[string]interface{}) - } - // Convert OrderedMap to map[string]interface{} for ExtraParams - requestFieldsMap := make(map[string]interface{}) - for k, v := range request.AdditionalModelRequestFields { - requestFieldsMap[k] = v - } - bifrostReq.Params.ExtraParams["additionalModelRequestFieldPaths"] = requestFieldsMap - } - - // Convert additional model response field paths to extra params - if len(request.AdditionalModelResponseFieldPaths) > 0 { - if bifrostReq.Params.ExtraParams == nil { - bifrostReq.Params.ExtraParams = make(map[string]interface{}) + reasoningConfig, ok := schemas.SafeExtractFromMap(request.AdditionalModelRequestFields, "reasoning_config") + if ok { + if reasoningConfigMap, ok := reasoningConfig.(map[string]interface{}); ok { + if typeStr, ok := schemas.SafeExtractString(reasoningConfigMap["type"]); ok { + if typeStr == "enabled" { + if maxTokens, ok := schemas.SafeExtractInt(reasoningConfigMap["budget_tokens"]); ok { + bifrostReq.Params.Reasoning = &schemas.ResponsesParametersReasoning{ + Effort: schemas.Ptr("auto"), + MaxTokens: schemas.Ptr(maxTokens), + } + } + } else { + bifrostReq.Params.Reasoning = &schemas.ResponsesParametersReasoning{ + Effort: schemas.Ptr("none"), + } + } + } + } } - bifrostReq.Params.ExtraParams["additionalModelResponseFieldPaths"] = request.AdditionalModelResponseFieldPaths } // Convert performance config to extra params @@ -446,6 +449,27 @@ func ToBedrockResponsesRequest(bifrostReq *schemas.BifrostResponsesRequest) (*Be if bifrostReq.Params.TopP != nil { inferenceConfig.TopP = bifrostReq.Params.TopP } + if bifrostReq.Params.Reasoning != nil { + if bedrockReq.AdditionalModelRequestFields == nil { + bedrockReq.AdditionalModelRequestFields = make(schemas.OrderedMap) + } + if bifrostReq.Params.Reasoning.Effort != nil && *bifrostReq.Params.Reasoning.Effort == "none" { + bedrockReq.AdditionalModelRequestFields["reasoning_config"] = map[string]string{ + "type": "disabled", + } + } else { + if bifrostReq.Params.Reasoning.MaxTokens == nil { + return nil, fmt.Errorf("reasoning.max_tokens is required for reasoning") + } else if schemas.IsAnthropicModel(bedrockReq.ModelID) && *bifrostReq.Params.Reasoning.MaxTokens < anthropic.MinimumReasoningMaxTokens { + return nil, fmt.Errorf("reasoning.max_tokens must be greater than or equal to %d", anthropic.MinimumReasoningMaxTokens) + } else { + bedrockReq.AdditionalModelRequestFields["reasoning_config"] = map[string]any{ + "type": "enabled", + "budget_tokens": *bifrostReq.Params.Reasoning.MaxTokens, + } + } + } + } if bifrostReq.Params.ExtraParams != nil { if stop, ok := schemas.SafeExtractStringSlice(bifrostReq.Params.ExtraParams["stop"]); ok { inferenceConfig.StopSequences = stop @@ -635,6 +659,12 @@ func (response *BedrockConverseResponse) ToBifrostResponsesResponse() (*schemas. CreatedAt: int(time.Now().Unix()), } + // Convert output message to Responses format + if response.Output != nil && response.Output.Message != nil { + outputMessages := convertBedrockMessageToResponsesMessages(*response.Output.Message) + bifrostResp.Output = outputMessages + } + if response.Usage != nil { // Convert usage information bifrostResp.Usage = &schemas.ResponsesResponseUsage{ @@ -655,12 +685,6 @@ func (response *BedrockConverseResponse) ToBifrostResponsesResponse() (*schemas. } } - // Convert output message to Responses format - if response.Output != nil && response.Output.Message != nil { - outputMessages := convertBedrockMessageToResponsesMessages(*response.Output.Message) - bifrostResp.Output = outputMessages - } - if response.ServiceTier != nil && response.ServiceTier.Type != "" { bifrostResp.ServiceTier = &response.ServiceTier.Type } diff --git a/core/providers/bedrock/types.go b/core/providers/bedrock/types.go index 55d68ef57..faa224e1f 100644 --- a/core/providers/bedrock/types.go +++ b/core/providers/bedrock/types.go @@ -105,6 +105,9 @@ type BedrockContentBlock struct { // Guard content (for guardrails) GuardContent *BedrockGuardContent `json:"guardContent,omitempty"` + // Reasoning content + ReasoningContent *BedrockReasoningContent `json:"reasoningContent,omitempty"` + // For Tool Call Result content JSON interface{} `json:"json,omitempty"` } @@ -151,6 +154,15 @@ type BedrockGuardContent struct { Text *BedrockGuardContentText `json:"text,omitempty"` } +type BedrockReasoningContent struct { + ReasoningText *BedrockReasoningContentText `json:"reasoningText,omitempty"` +} + +type BedrockReasoningContentText struct { + Text string `json:"text"` + Signature *string `json:"signature,omitempty"` +} + // BedrockGuardContentText represents text content for guardrails type BedrockGuardContentText struct { Text string `json:"text"` // Required: Text content @@ -429,8 +441,9 @@ type BedrockToolUseStart struct { // BedrockContentBlockDelta represents the incremental content type BedrockContentBlockDelta struct { - Text *string `json:"text,omitempty"` // Text content delta - ToolUse *BedrockToolUseDelta `json:"toolUse,omitempty"` // Tool use delta + Text *string `json:"text,omitempty"` // Text content delta + ReasoningContent *BedrockReasoningContentText `json:"reasoningContent,omitempty"` // Reasoning content delta + ToolUse *BedrockToolUseDelta `json:"toolUse,omitempty"` // Tool use delta } // BedrockToolUseDelta represents incremental tool use content diff --git a/core/providers/bedrock/utils.go b/core/providers/bedrock/utils.go index 52f2e4725..fd7fe92f9 100644 --- a/core/providers/bedrock/utils.go +++ b/core/providers/bedrock/utils.go @@ -7,13 +7,14 @@ import ( "strings" "github.com/bytedance/sonic" + "github.com/maximhq/bifrost/core/providers/anthropic" schemas "github.com/maximhq/bifrost/core/schemas" ) // convertParameters handles parameter conversion -func convertChatParameters(ctx *context.Context, bifrostReq *schemas.BifrostChatRequest, bedrockReq *BedrockConverseRequest) { +func convertChatParameters(ctx *context.Context, bifrostReq *schemas.BifrostChatRequest, bedrockReq *BedrockConverseRequest) error { if bifrostReq.Params == nil { - return + return nil } // Convert inference config if inferenceConfig := convertInferenceConfig(bifrostReq.Params); inferenceConfig != nil { @@ -28,6 +29,28 @@ func convertChatParameters(ctx *context.Context, bifrostReq *schemas.BifrostChat bedrockReq.ToolConfig = toolConfig } + // Convert reasoning config + if bifrostReq.Params.Reasoning != nil { + if bedrockReq.AdditionalModelRequestFields == nil { + bedrockReq.AdditionalModelRequestFields = make(schemas.OrderedMap) + } + if bifrostReq.Params.Reasoning.Effort != nil && *bifrostReq.Params.Reasoning.Effort == "none" { + bedrockReq.AdditionalModelRequestFields["reasoning_config"] = map[string]string{ + "type": "disabled", + } + } else { + if bifrostReq.Params.Reasoning.MaxTokens == nil { + return fmt.Errorf("reasoning.max_tokens is required for reasoning") + } else if schemas.IsAnthropicModel(bedrockReq.ModelID) && *bifrostReq.Params.Reasoning.MaxTokens < anthropic.MinimumReasoningMaxTokens { + return fmt.Errorf("reasoning.max_tokens must be greater than or equal to %d", anthropic.MinimumReasoningMaxTokens) + } + bedrockReq.AdditionalModelRequestFields["reasoning_config"] = map[string]any{ + "type": "enabled", + "budget_tokens": *bifrostReq.Params.Reasoning.MaxTokens, + } + } + } + // If response_format was converted to a tool, add it to the tool config if responseFormatTool != nil { if bedrockReq.ToolConfig == nil { @@ -131,6 +154,7 @@ func convertChatParameters(ctx *context.Context, bifrostReq *schemas.BifrostChat } } } + return nil } // ensureChatToolConfigForConversation ensures toolConfig is present when tool content exists @@ -675,8 +699,14 @@ func ToBedrockError(bifrostErr *schemas.BifrostError) *BedrockError { } } + // Safely extract message from nested error + message := "" + if bifrostErr.Error != nil { + message = bifrostErr.Error.Message + } + bedrockErr := &BedrockError{ - Message: bifrostErr.Error.Message, + Message: message, } // Map error type/code diff --git a/core/providers/cohere/chat.go b/core/providers/cohere/chat.go index 97e7a6298..052a1fa1d 100644 --- a/core/providers/cohere/chat.go +++ b/core/providers/cohere/chat.go @@ -1,15 +1,16 @@ package cohere import ( + "fmt" "time" "github.com/maximhq/bifrost/core/schemas" ) // ToCohereChatCompletionRequest converts a Bifrost request to Cohere v2 format -func ToCohereChatCompletionRequest(bifrostReq *schemas.BifrostChatRequest) *CohereChatRequest { +func ToCohereChatCompletionRequest(bifrostReq *schemas.BifrostChatRequest) (*CohereChatRequest, error) { if bifrostReq == nil || bifrostReq.Input == nil { - return nil + return nil, fmt.Errorf("bifrost request is nil") } messages := bifrostReq.Input @@ -99,6 +100,23 @@ func ToCohereChatCompletionRequest(bifrostReq *schemas.BifrostChatRequest) *Cohe cohereReq.FrequencyPenalty = bifrostReq.Params.FrequencyPenalty cohereReq.PresencePenalty = bifrostReq.Params.PresencePenalty + if bifrostReq.Params.Reasoning != nil { + if bifrostReq.Params.Reasoning.Effort != nil && *bifrostReq.Params.Reasoning.Effort == "none" { + cohereReq.Thinking = &CohereThinking{ + Type: ThinkingTypeDisabled, + } + } else { + if bifrostReq.Params.Reasoning.MaxTokens == nil { + return nil, fmt.Errorf("reasoning.max_tokens is required for reasoning") + } else { + cohereReq.Thinking = &CohereThinking{ + Type: ThinkingTypeEnabled, + TokenBudget: bifrostReq.Params.Reasoning.MaxTokens, + } + } + } + } + // Convert response format if bifrostReq.Params.ResponseFormat != nil { cohereReq.ResponseFormat = convertResponseFormatToCohere(bifrostReq.Params.ResponseFormat) @@ -179,7 +197,7 @@ func ToCohereChatCompletionRequest(bifrostReq *schemas.BifrostChatRequest) *Cohe } } - return cohereReq + return cohereReq, nil } // ToBifrostChatRequest converts a Cohere v2 chat request to Bifrost format @@ -193,10 +211,16 @@ func (req *CohereChatRequest) ToBifrostChatRequest() *schemas.BifrostChatRequest bifrostReq := &schemas.BifrostChatRequest{ Provider: provider, Model: model, - Input: convertCohereMessagesToBifrost(req.Messages), Params: &schemas.ChatParameters{}, } - + // Convert messages + if req.Messages != nil { + bifrostMessages := make([]schemas.ChatMessage, len(req.Messages)) + for i, message := range req.Messages { + bifrostMessages[i] = *message.ToBifrostChatMessage() + } + bifrostReq.Input = bifrostMessages + } // Convert parameters if req.MaxTokens != nil { bifrostReq.Params.MaxCompletionTokens = req.MaxTokens @@ -216,6 +240,22 @@ func (req *CohereChatRequest) ToBifrostChatRequest() *schemas.BifrostChatRequest if req.PresencePenalty != nil { bifrostReq.Params.PresencePenalty = req.PresencePenalty } + + // Convert reasoning + if req.Thinking != nil { + if req.Thinking.Type == ThinkingTypeDisabled { + bifrostReq.Params.Reasoning = &schemas.ChatReasoning{ + Effort: schemas.Ptr("none"), + } + } else { + bifrostReq.Params.Reasoning = &schemas.ChatReasoning{ + Effort: schemas.Ptr("auto"), + } + if req.Thinking.TokenBudget != nil { + bifrostReq.Params.Reasoning.MaxTokens = req.Thinking.TokenBudget + } + } + } if req.ResponseFormat != nil { bifrostReq.Params.ResponseFormat = convertCohereResponseFormatToBifrost(req.ResponseFormat) } @@ -293,12 +333,8 @@ func (response *CohereChatResponse) ToBifrostChatResponse(model string) *schemas Object: "chat.completion", Choices: []schemas.BifrostResponseChoice{ { - Index: 0, - ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{ - Message: &schemas.ChatMessage{ - Role: schemas.ChatMessageRoleAssistant, - }, - }, + Index: 0, + ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{}, }, }, Created: int(time.Now().Unix()), @@ -308,95 +344,10 @@ func (response *CohereChatResponse) ToBifrostChatResponse(model string) *schemas }, } - var content *string - var contentBlocks []schemas.ChatContentBlock - var toolCalls []schemas.ChatAssistantMessageToolCall - - // Convert message content + // Convert messages if response.Message != nil { - if response.Message.Content != nil { - if response.Message.Content.IsString() || - (response.Message.Content.IsBlocks() && - len(response.Message.Content.GetBlocks()) == 1 && - response.Message.Content.GetBlocks()[0].Type == CohereContentBlockTypeText) { - if response.Message.Content.IsString() { - content = response.Message.Content.GetString() - } else { - content = response.Message.Content.GetBlocks()[0].Text - } - } else if response.Message.Content.IsBlocks() { - for _, block := range response.Message.Content.GetBlocks() { - if block.Type == CohereContentBlockTypeText && block.Text != nil { - contentBlocks = append(contentBlocks, schemas.ChatContentBlock{ - Type: schemas.ChatContentBlockTypeText, - Text: block.Text, - }) - } else if block.Type == CohereContentBlockTypeImage && block.ImageURL != nil { - contentBlocks = append(contentBlocks, schemas.ChatContentBlock{ - Type: schemas.ChatContentBlockTypeImage, - ImageURLStruct: &schemas.ChatInputImage{ - URL: block.ImageURL.URL, - }, - }) - } - } - } - } - - // Create the message content - messageContent := &schemas.ChatMessageContent{ - ContentStr: content, - ContentBlocks: contentBlocks, - } - - // Convert tool calls - if response.Message.ToolCalls != nil { - for _, toolCall := range response.Message.ToolCalls { - // Check if Function is nil to avoid nil pointer dereference - if toolCall.Function == nil { - // Skip this tool call if Function is nil - continue - } - - // Safely extract function name and arguments - var functionName *string - var functionArguments string - - if toolCall.Function.Name != nil { - functionName = toolCall.Function.Name - } else { - // Use empty string if Name is nil - functionName = schemas.Ptr("") - } - - // Arguments is a string, not a pointer, so it's safe to access directly - functionArguments = toolCall.Function.Arguments - - bifrostToolCall := schemas.ChatAssistantMessageToolCall{ - Index: uint16(len(toolCalls)), - ID: toolCall.ID, - Function: schemas.ChatAssistantMessageToolCallFunction{ - Name: functionName, - Arguments: functionArguments, - }, - } - toolCalls = append(toolCalls, bifrostToolCall) - } - } - - // Create assistant message if we have tool calls - var assistantMessage *schemas.ChatAssistantMessage - if len(toolCalls) > 0 { - assistantMessage = &schemas.ChatAssistantMessage{ - ToolCalls: toolCalls, - } - } - - bifrostResponse.Choices[0].ChatNonStreamResponseChoice.Message = &schemas.ChatMessage{ - Role: schemas.ChatMessageRoleAssistant, - Content: messageContent, - ChatAssistantMessage: assistantMessage, - } + bifrostMessage := response.Message.ToBifrostChatMessage() + bifrostResponse.Choices[0].ChatNonStreamResponseChoice.Message = bifrostMessage } // Convert finish reason @@ -456,24 +407,48 @@ func (chunk *CohereStreamEvent) ToBifrostChatCompletionStream() (*schemas.Bifros if chunk.Delta != nil && chunk.Delta.Message != nil && chunk.Delta.Message.Content != nil && - chunk.Delta.Message.Content.CohereStreamContentObject != nil && - chunk.Delta.Message.Content.CohereStreamContentObject.Text != nil { - // Try to cast content to CohereStreamContent - streamResponse := &schemas.BifrostChatResponse{ - Object: "chat.completion.chunk", - Choices: []schemas.BifrostResponseChoice{ - { - Index: 0, - ChatStreamResponseChoice: &schemas.ChatStreamResponseChoice{ - Delta: &schemas.ChatStreamResponseChoiceDelta{ - Content: chunk.Delta.Message.Content.CohereStreamContentObject.Text, + chunk.Delta.Message.Content.CohereStreamContentObject != nil { + if chunk.Delta.Message.Content.CohereStreamContentObject.Text != nil { + // Try to cast content to CohereStreamContent + streamResponse := &schemas.BifrostChatResponse{ + Object: "chat.completion.chunk", + Choices: []schemas.BifrostResponseChoice{ + { + Index: 0, + ChatStreamResponseChoice: &schemas.ChatStreamResponseChoice{ + Delta: &schemas.ChatStreamResponseChoiceDelta{ + Content: chunk.Delta.Message.Content.CohereStreamContentObject.Text, + }, }, }, }, - }, - } + } - return streamResponse, nil, false + return streamResponse, nil, false + } else if chunk.Delta.Message.Content.CohereStreamContentObject.Thinking != nil { + streamResponse := &schemas.BifrostChatResponse{ + Object: "chat.completion.chunk", + Choices: []schemas.BifrostResponseChoice{ + { + Index: 0, + ChatStreamResponseChoice: &schemas.ChatStreamResponseChoice{ + Delta: &schemas.ChatStreamResponseChoiceDelta{ + Reasoning: chunk.Delta.Message.Content.CohereStreamContentObject.Thinking, + ReasoningDetails: []schemas.ChatReasoningDetails{ + { + Index: 0, + Type: schemas.BifrostReasoningDetailsTypeText, + Text: chunk.Delta.Message.Content.CohereStreamContentObject.Thinking, + }, + }, + }, + }, + }, + }, + } + + return streamResponse, nil, false + } } case StreamEventToolPlanDelta: @@ -485,7 +460,7 @@ func (chunk *CohereStreamEvent) ToBifrostChatCompletionStream() (*schemas.Bifros Index: 0, ChatStreamResponseChoice: &schemas.ChatStreamResponseChoice{ Delta: &schemas.ChatStreamResponseChoiceDelta{ - Thought: chunk.Delta.Message.ToolPlan, + Reasoning: chunk.Delta.Message.ToolPlan, }, }, }, @@ -583,84 +558,123 @@ func (chunk *CohereStreamEvent) ToBifrostChatCompletionStream() (*schemas.Bifros return nil, nil, false } -// convertCohereMessagesToBifrost converts Cohere messages to Bifrost format -func convertCohereMessagesToBifrost(messages []CohereMessage) []schemas.ChatMessage { - if messages == nil { +func (cm *CohereMessage) ToBifrostChatMessage() *schemas.ChatMessage { + if cm == nil { return nil } - bifrostMessages := make([]schemas.ChatMessage, len(messages)) - for i, msg := range messages { - bifrostMsg := schemas.ChatMessage{ - Role: schemas.ChatMessageRole(msg.Role), - } + var content *string + var contentBlocks []schemas.ChatContentBlock + var toolCalls []schemas.ChatAssistantMessageToolCall + var reasoningDetails []schemas.ChatReasoningDetails + var reasoningText string - // Convert content - if msg.Content != nil { - if msg.Content.IsString() { - bifrostMsg.Content = &schemas.ChatMessageContent{ - ContentStr: msg.Content.GetString(), - } - } else if msg.Content.IsBlocks() { - var contentBlocks []schemas.ChatContentBlock - for _, block := range msg.Content.GetBlocks() { - switch block.Type { - case CohereContentBlockTypeText: - contentBlocks = append(contentBlocks, schemas.ChatContentBlock{ - Type: schemas.ChatContentBlockTypeText, - Text: block.Text, - }) - case CohereContentBlockTypeImage: - if block.ImageURL != nil { - contentBlocks = append(contentBlocks, schemas.ChatContentBlock{ - Type: schemas.ChatContentBlockTypeImage, - ImageURLStruct: &schemas.ChatInputImage{ - URL: block.ImageURL.URL, - }, - }) - } - } - } - if len(contentBlocks) > 0 { - bifrostMsg.Content = &schemas.ChatMessageContent{ - ContentBlocks: contentBlocks, + // Convert message content + if cm.Content != nil { + if cm.Content.IsString() || + (cm.Content.IsBlocks() && + len(cm.Content.GetBlocks()) == 1 && + cm.Content.GetBlocks()[0].Type == CohereContentBlockTypeText) { + if cm.Content.IsString() { + content = cm.Content.GetString() + } else { + content = cm.Content.GetBlocks()[0].Text + } + } else if cm.Content.IsBlocks() { + for _, block := range cm.Content.GetBlocks() { + if block.Type == CohereContentBlockTypeText && block.Text != nil { + contentBlocks = append(contentBlocks, schemas.ChatContentBlock{ + Type: schemas.ChatContentBlockTypeText, + Text: block.Text, + }) + } else if block.Type == CohereContentBlockTypeImage && block.ImageURL != nil { + contentBlocks = append(contentBlocks, schemas.ChatContentBlock{ + Type: schemas.ChatContentBlockTypeImage, + ImageURLStruct: &schemas.ChatInputImage{ + URL: block.ImageURL.URL, + }, + }) + } else if block.Type == CohereContentBlockTypeThinking && block.Thinking != nil { + reasoningDetails = append(reasoningDetails, schemas.ChatReasoningDetails{ + Index: len(reasoningDetails), + Type: schemas.BifrostReasoningDetailsTypeText, + Text: block.Thinking, + }) + if len(reasoningText) > 0 { + reasoningText += "\n" } + reasoningText += *block.Thinking } } } + } - // Convert tool calls (for assistant messages) - if msg.ToolCalls != nil { - var toolCalls []schemas.ChatAssistantMessageToolCall - for j, tc := range msg.ToolCalls { - toolCall := schemas.ChatAssistantMessageToolCall{ - Index: uint16(j), - ID: tc.ID, - } - if tc.Function != nil { - toolCall.Function = schemas.ChatAssistantMessageToolCallFunction{ - Name: tc.Function.Name, - Arguments: tc.Function.Arguments, - } - } - toolCalls = append(toolCalls, toolCall) + if len(contentBlocks) == 1 && contentBlocks[0].Type == schemas.ChatContentBlockTypeText { + content = contentBlocks[0].Text + contentBlocks = nil + } + + // Create the message content + messageContent := &schemas.ChatMessageContent{ + ContentStr: content, + ContentBlocks: contentBlocks, + } + + // Convert tool calls + if cm.ToolCalls != nil { + for _, toolCall := range cm.ToolCalls { + // Check if Function is nil to avoid nil pointer dereference + if toolCall.Function == nil { + // Skip this tool call if Function is nil + continue } - if len(toolCalls) > 0 { - bifrostMsg.ChatAssistantMessage = &schemas.ChatAssistantMessage{ - ToolCalls: toolCalls, - } + + // Safely extract function name and arguments + var functionName *string + var functionArguments string + + if toolCall.Function.Name != nil { + functionName = toolCall.Function.Name + } else { + // Use empty string if Name is nil + functionName = schemas.Ptr("") } - } - // Convert tool call ID (for tool messages) - if msg.ToolCallID != nil { - bifrostMsg.ChatToolMessage = &schemas.ChatToolMessage{ - ToolCallID: msg.ToolCallID, + // Arguments is a string, not a pointer, so it's safe to access directly + functionArguments = toolCall.Function.Arguments + + bifrostToolCall := schemas.ChatAssistantMessageToolCall{ + Index: uint16(len(toolCalls)), + ID: toolCall.ID, + Function: schemas.ChatAssistantMessageToolCallFunction{ + Name: functionName, + Arguments: functionArguments, + }, } + toolCalls = append(toolCalls, bifrostToolCall) } + } - bifrostMessages[i] = bifrostMsg + // Create assistant message if we have tool calls + var assistantMessage *schemas.ChatAssistantMessage + if len(toolCalls) > 0 { + assistantMessage = &schemas.ChatAssistantMessage{ + ToolCalls: toolCalls, + } } - return bifrostMessages + if len(reasoningDetails) > 0 { + if assistantMessage == nil { + assistantMessage = &schemas.ChatAssistantMessage{} + } + assistantMessage.ReasoningDetails = reasoningDetails + assistantMessage.Reasoning = schemas.Ptr(reasoningText) + } + + bifrostMessage := &schemas.ChatMessage{ + Role: schemas.ChatMessageRole(cm.Role), + Content: messageContent, + ChatAssistantMessage: assistantMessage, + } + return bifrostMessage } diff --git a/core/providers/cohere/cohere.go b/core/providers/cohere/cohere.go index 7149005a2..b2fe7ec6d 100644 --- a/core/providers/cohere/cohere.go +++ b/core/providers/cohere/cohere.go @@ -279,7 +279,7 @@ func (provider *CohereProvider) ChatCompletion(ctx context.Context, key schemas. jsonBody, err := providerUtils.CheckContextAndGetRequestBody( ctx, request, - func() (any, error) { return ToCohereChatCompletionRequest(request), nil }, + func() (any, error) { return ToCohereChatCompletionRequest(request) }, provider.GetProviderKey()) if err != nil { return nil, err @@ -329,10 +329,11 @@ func (provider *CohereProvider) ChatCompletionStream(ctx context.Context, postHo ctx, request, func() (any, error) { - reqBody := ToCohereChatCompletionRequest(request) - if reqBody != nil { - reqBody.Stream = schemas.Ptr(true) + reqBody, err := ToCohereChatCompletionRequest(request) + if err != nil { + return nil, err } + reqBody.Stream = schemas.Ptr(true) return reqBody, nil }, provider.GetProviderKey()) diff --git a/core/providers/cohere/types.go b/core/providers/cohere/types.go index 89ae243e8..bb20a23d2 100644 --- a/core/providers/cohere/types.go +++ b/core/providers/cohere/types.go @@ -533,8 +533,9 @@ type CohereStreamMessage struct { // CohereStreamContent represents content in streaming events type CohereStreamContent struct { - Type CohereContentBlockType `json:"type,omitempty"` // For content-start - Text *string `json:"text,omitempty"` // For content deltas + Type CohereContentBlockType `json:"type,omitempty"` // For content-start + Text *string `json:"text,omitempty"` // For content deltas + Thinking *string `json:"thinking,omitempty"` // For thinking deltas } // ==================== ERROR TYPES ==================== diff --git a/core/providers/openai/chat.go b/core/providers/openai/chat.go index 02f7672a1..abfd07188 100644 --- a/core/providers/openai/chat.go +++ b/core/providers/openai/chat.go @@ -11,7 +11,7 @@ func (request *OpenAIChatRequest) ToBifrostChatRequest() *schemas.BifrostChatReq return &schemas.BifrostChatRequest{ Provider: provider, Model: model, - Input: request.Messages, + Input: ConvertOpenAIMessagesToBifrostMessages(request.Messages), Params: &request.ChatParameters, Fallbacks: schemas.ParseFallbacks(request.Fallbacks), } @@ -25,7 +25,7 @@ func ToOpenAIChatRequest(bifrostReq *schemas.BifrostChatRequest) *OpenAIChatRequ openaiReq := &OpenAIChatRequest{ Model: bifrostReq.Model, - Messages: bifrostReq.Input, + Messages: ConvertBifrostMessagesToOpenAIMessages(bifrostReq.Input), } if bifrostReq.Params != nil { @@ -60,8 +60,8 @@ func ToOpenAIChatRequest(bifrostReq *schemas.BifrostChatRequest) *OpenAIChatRequ // Filter OpenAI Specific Parameters func (request *OpenAIChatRequest) filterOpenAISpecificParameters() { - if request.ChatParameters.ReasoningEffort != nil && *request.ChatParameters.ReasoningEffort == "minimal" { - request.ChatParameters.ReasoningEffort = schemas.Ptr("low") + if request.ChatParameters.Reasoning != nil && request.ChatParameters.Reasoning.Effort != nil && *request.ChatParameters.Reasoning.Effort == "minimal" { + request.ChatParameters.Reasoning.Effort = schemas.Ptr("low") } if request.ChatParameters.PromptCacheKey != nil { request.ChatParameters.PromptCacheKey = nil diff --git a/core/providers/openai/openai.go b/core/providers/openai/openai.go index 670d5ca76..c2edb4b68 100644 --- a/core/providers/openai/openai.go +++ b/core/providers/openai/openai.go @@ -512,6 +512,12 @@ func HandleOpenAITextCompletionStreaming( if calculatedTotal > usage.TotalTokens { usage.TotalTokens = calculatedTotal } + if response.Usage.CompletionTokensDetails != nil { + usage.CompletionTokensDetails = response.Usage.CompletionTokensDetails + } + if response.Usage.PromptTokensDetails != nil { + usage.PromptTokensDetails = response.Usage.PromptTokensDetails + } response.Usage = nil } @@ -972,6 +978,12 @@ func HandleOpenAIChatCompletionStreaming( if calculatedTotal > usage.TotalTokens { usage.TotalTokens = calculatedTotal } + if response.Usage.PromptTokensDetails != nil { + usage.PromptTokensDetails = response.Usage.PromptTokensDetails + } + if response.Usage.CompletionTokensDetails != nil { + usage.CompletionTokensDetails = response.Usage.CompletionTokensDetails + } response.Usage = nil } diff --git a/core/providers/openai/types.go b/core/providers/openai/types.go index ccb1db46d..352006e82 100644 --- a/core/providers/openai/types.go +++ b/core/providers/openai/types.go @@ -39,8 +39,8 @@ type OpenAIEmbeddingRequest struct { // OpenAIChatRequest represents an OpenAI chat completion request type OpenAIChatRequest struct { - Model string `json:"model"` - Messages []schemas.ChatMessage `json:"messages"` + Model string `json:"model"` + Messages []OpenAIMessage `json:"messages"` schemas.ChatParameters Stream *bool `json:"stream,omitempty"` @@ -53,6 +53,56 @@ type OpenAIChatRequest struct { Fallbacks []string `json:"fallbacks,omitempty"` } +type OpenAIMessage struct { + Name *string `json:"name,omitempty"` // for chat completions + Role schemas.ChatMessageRole `json:"role,omitempty"` + Content *schemas.ChatMessageContent `json:"content,omitempty"` + + // Embedded pointer structs - when non-nil, their exported fields are flattened into the top-level JSON object + // IMPORTANT: Only one of the following can be non-nil at a time, otherwise the JSON marshalling will override the common fields + *schemas.ChatToolMessage + *OpenAIChatAssistantMessage +} + +type OpenAIChatAssistantMessage struct { + Refusal *string `json:"refusal,omitempty"` + Reasoning *string `json:"reasoning,omitempty"` + Annotations []schemas.ChatAssistantMessageAnnotation `json:"annotations,omitempty"` + ToolCalls []schemas.ChatAssistantMessageToolCall `json:"tool_calls,omitempty"` +} + +// MarshalJSON implements custom JSON marshalling for OpenAIChatRequest. +// It excludes the reasoning field and instead marshals reasoning_effort +// with the value of Reasoning.Effort if not nil. +func (r *OpenAIChatRequest) MarshalJSON() ([]byte, error) { + if r == nil { + return []byte("null"), nil + } + type Alias OpenAIChatRequest + + // Aux struct: + // - Alias embeds all original fields + // - Reasoning shadows the embedded ChatParameters.Reasoning + // so that "reasoning" is not emitted + // - ReasoningEffort is emitted as "reasoning_effort" + aux := struct { + *Alias + // Shadow the embedded "reasoning" field and omit it + Reasoning *schemas.ChatReasoning `json:"reasoning,omitempty"` + ReasoningEffort *string `json:"reasoning_effort,omitempty"` + }{ + Alias: (*Alias)(r), + } + + // DO NOT set aux.Reasoning → it stays nil and is omitted via omitempty, and also due to double reference to the same json field. + + if r.Reasoning != nil && r.Reasoning.Effort != nil { + aux.ReasoningEffort = r.Reasoning.Effort + } + + return sonic.Marshal(aux) +} + // IsStreamingRequested implements the StreamingRequest interface func (r *OpenAIChatRequest) IsStreamingRequested() bool { return r.Stream != nil && *r.Stream diff --git a/core/providers/openai/utils.go b/core/providers/openai/utils.go new file mode 100644 index 000000000..71906f014 --- /dev/null +++ b/core/providers/openai/utils.go @@ -0,0 +1,45 @@ +package openai + +import "github.com/maximhq/bifrost/core/schemas" + +func ConvertOpenAIMessagesToBifrostMessages(messages []OpenAIMessage) []schemas.ChatMessage { + bifrostMessages := make([]schemas.ChatMessage, len(messages)) + for i, message := range messages { + bifrostMessages[i] = schemas.ChatMessage{ + Name: message.Name, + Role: message.Role, + Content: message.Content, + ChatToolMessage: message.ChatToolMessage, + } + if message.OpenAIChatAssistantMessage != nil { + bifrostMessages[i].ChatAssistantMessage = &schemas.ChatAssistantMessage{ + Refusal: message.OpenAIChatAssistantMessage.Refusal, + Reasoning: message.OpenAIChatAssistantMessage.Reasoning, + Annotations: message.OpenAIChatAssistantMessage.Annotations, + ToolCalls: message.OpenAIChatAssistantMessage.ToolCalls, + } + } + } + return bifrostMessages +} + +func ConvertBifrostMessagesToOpenAIMessages(messages []schemas.ChatMessage) []OpenAIMessage { + openaiMessages := make([]OpenAIMessage, len(messages)) + for i, message := range messages { + openaiMessages[i] = OpenAIMessage{ + Name: message.Name, + Role: message.Role, + Content: message.Content, + ChatToolMessage: message.ChatToolMessage, + } + if message.ChatAssistantMessage != nil { + openaiMessages[i].OpenAIChatAssistantMessage = &OpenAIChatAssistantMessage{ + Refusal: message.ChatAssistantMessage.Refusal, + Reasoning: message.ChatAssistantMessage.Reasoning, + Annotations: message.ChatAssistantMessage.Annotations, + ToolCalls: message.ChatAssistantMessage.ToolCalls, + } + } + } + return openaiMessages +} diff --git a/core/providers/perplexity/chat.go b/core/providers/perplexity/chat.go index 30de31eb6..ef7a0df1f 100644 --- a/core/providers/perplexity/chat.go +++ b/core/providers/perplexity/chat.go @@ -27,11 +27,11 @@ func ToPerplexityChatCompletionRequest(bifrostReq *schemas.BifrostChatRequest) * perplexityReq.ResponseFormat = bifrostReq.Params.ResponseFormat // Handle reasoning effort mapping - if bifrostReq.Params.ReasoningEffort != nil { - if *bifrostReq.Params.ReasoningEffort == "minimal" { + if bifrostReq.Params.Reasoning != nil && bifrostReq.Params.Reasoning.Effort != nil { + if *bifrostReq.Params.Reasoning.Effort == "minimal" { perplexityReq.ReasoningEffort = schemas.Ptr("low") } else { - perplexityReq.ReasoningEffort = bifrostReq.Params.ReasoningEffort + perplexityReq.ReasoningEffort = bifrostReq.Params.Reasoning.Effort } } diff --git a/core/providers/vertex/vertex.go b/core/providers/vertex/vertex.go index be7d6e407..b95d49f86 100644 --- a/core/providers/vertex/vertex.go +++ b/core/providers/vertex/vertex.go @@ -280,7 +280,10 @@ func (provider *VertexProvider) ChatCompletion(ctx context.Context, key schemas. if schemas.IsAnthropicModel(deployment) { // Use centralized Anthropic converter - reqBody := anthropic.ToAnthropicChatRequest(request) + reqBody, err := anthropic.ToAnthropicChatRequest(request) + if err != nil { + return nil, err + } if reqBody == nil { return nil, fmt.Errorf("chat completion input is not provided") } @@ -512,13 +515,14 @@ func (provider *VertexProvider) ChatCompletionStream(ctx context.Context, postHo ctx, request, func() (any, error) { - reqBody := anthropic.ToAnthropicChatRequest(request) - if reqBody == nil { - return nil, fmt.Errorf("chat completion input is not provided") + reqBody, err := anthropic.ToAnthropicChatRequest(request) + if err != nil { + return nil, err + } + if reqBody != nil { + reqBody.Model = deployment + reqBody.Stream = schemas.Ptr(true) } - - reqBody.Model = deployment - reqBody.Stream = schemas.Ptr(true) // Convert struct to map for Vertex API reqBytes, err := sonic.Marshal(reqBody) @@ -682,13 +686,13 @@ func (provider *VertexProvider) Responses(ctx context.Context, key schemas.Key, var requestBody map[string]interface{} // Use centralized Anthropic converter - reqBody := anthropic.ToAnthropicResponsesRequest(request) - if reqBody == nil { - return nil, fmt.Errorf("responses input is not provided") + reqBody, err := anthropic.ToAnthropicResponsesRequest(request) + if err != nil { + return nil, err + } + if reqBody != nil { + reqBody.Model = deployment } - - reqBody.Model = deployment - // Convert struct to map for Vertex API reqBytes, err := sonic.Marshal(reqBody) if err != nil { @@ -840,14 +844,14 @@ func (provider *VertexProvider) ResponsesStream(ctx context.Context, postHookRun ctx, request, func() (any, error) { - reqBody := anthropic.ToAnthropicResponsesRequest(request) - if reqBody == nil { - return nil, fmt.Errorf("responses input is not provided") + reqBody, err := anthropic.ToAnthropicResponsesRequest(request) + if err != nil { + return nil, err + } + if reqBody != nil { + reqBody.Model = deployment + reqBody.Stream = schemas.Ptr(true) } - - reqBody.Model = deployment - reqBody.Stream = schemas.Ptr(true) - // Convert struct to map for Vertex API reqBytes, err := sonic.Marshal(reqBody) if err != nil { diff --git a/core/schemas/chatcompletions.go b/core/schemas/chatcompletions.go index c103ad04d..15a774896 100644 --- a/core/schemas/chatcompletions.go +++ b/core/schemas/chatcompletions.go @@ -162,7 +162,7 @@ type ChatParameters struct { ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` PresencePenalty *float64 `json:"presence_penalty,omitempty"` // Penalizes repeated tokens PromptCacheKey *string `json:"prompt_cache_key,omitempty"` // Prompt cache key - ReasoningEffort *string `json:"reasoning_effort,omitempty"` // "minimal" | "low" | "medium" | "high" + Reasoning *ChatReasoning `json:"reasoning,omitempty"` // Reasoning parameters ResponseFormat *interface{} `json:"response_format,omitempty"` // Format for the response SafetyIdentifier *string `json:"safety_identifier,omitempty"` // Safety identifier Seed *int `json:"seed,omitempty"` @@ -183,6 +183,48 @@ type ChatParameters struct { ExtraParams map[string]interface{} `json:"-"` } +// UnmarshalJSON implements custom JSON unmarshalling for ChatParameters. +func (cp *ChatParameters) UnmarshalJSON(data []byte) error { + // Alias to avoid recursion + type Alias ChatParameters + + // Aux struct adds reasoning_effort for decoding + var aux struct { + *Alias + ReasoningEffort *string `json:"reasoning_effort"` // only for input + } + + aux.Alias = (*Alias)(cp) + + // Single unmarshal + if err := sonic.Unmarshal(data, &aux); err != nil { + return err + } + + // Now aux.Reasoning (from Alias) and aux.ReasoningEffort are filled + + // If both are non-nil, they were both set in JSON + if aux.Alias != nil && aux.Alias.Reasoning != nil && aux.ReasoningEffort != nil { + return fmt.Errorf("both reasoning_effort and reasoning fields cannot be present at the same time") + } + + // If reasoning_effort is set, convert it into Reasoning + if aux.ReasoningEffort != nil { + cp.Reasoning = &ChatReasoning{ + Effort: aux.ReasoningEffort, + } + } + + // ExtraParams etc. are already handled by the alias + return nil +} + +// Not in OpenAI's spec, but needed to support extra parameters for reasoning. +type ChatReasoning struct { + Effort *string `json:"effort,omitempty"` // "none" | "minimal" | "low" | "medium" | "high" (any value other than "none" will enable reasoning) + MaxTokens *int `json:"max_tokens,omitempty"` // Maximum number of tokens to generate for the reasoning output (required for anthropic) +} + // ChatStreamOptions represents the stream options for a chat completion. type ChatStreamOptions struct { IncludeObfuscation *bool `json:"include_obfuscation,omitempty"` @@ -435,6 +477,49 @@ type ChatMessage struct { *ChatAssistantMessage } +// UnmarshalJSON implements custom JSON unmarshalling for ChatMessage. +// This is needed because ChatAssistantMessage has a custom UnmarshalJSON method, +// which interferes with sonic's handling of other fields in ChatMessage. +func (cm *ChatMessage) UnmarshalJSON(data []byte) error { + // Unmarshal the base fields directly + type baseFields struct { + Name *string `json:"name,omitempty"` + Role ChatMessageRole `json:"role,omitempty"` + Content *ChatMessageContent `json:"content,omitempty"` + } + var base baseFields + if err := sonic.Unmarshal(data, &base); err != nil { + return err + } + cm.Name = base.Name + cm.Role = base.Role + cm.Content = base.Content + + // Unmarshal ChatToolMessage fields + type toolMsgAlias ChatToolMessage + var toolMsg toolMsgAlias + if err := sonic.Unmarshal(data, &toolMsg); err != nil { + return err + } + if toolMsg.ToolCallID != nil { + cm.ChatToolMessage = (*ChatToolMessage)(&toolMsg) + } + + // Unmarshal ChatAssistantMessage (which has its own custom unmarshaller) + var assistantMsg ChatAssistantMessage + if err := sonic.Unmarshal(data, &assistantMsg); err != nil { + return err + } + // Only set if any field is populated + if assistantMsg.Refusal != nil || assistantMsg.Reasoning != nil || + len(assistantMsg.ReasoningDetails) > 0 || len(assistantMsg.Annotations) > 0 || + len(assistantMsg.ToolCalls) > 0 { + cm.ChatAssistantMessage = &assistantMsg + } + + return nil +} + // ChatMessageContent represents a content in a message. type ChatMessageContent struct { ContentStr *string @@ -539,9 +624,46 @@ type ChatToolMessage struct { // ChatAssistantMessage represents a message in a chat conversation. type ChatAssistantMessage struct { - Refusal *string `json:"refusal,omitempty"` - Annotations []ChatAssistantMessageAnnotation `json:"annotations,omitempty"` - ToolCalls []ChatAssistantMessageToolCall `json:"tool_calls,omitempty"` + Refusal *string `json:"refusal,omitempty"` + Reasoning *string `json:"reasoning,omitempty"` + ReasoningDetails []ChatReasoningDetails `json:"reasoning_details,omitempty"` + Annotations []ChatAssistantMessageAnnotation `json:"annotations,omitempty"` + ToolCalls []ChatAssistantMessageToolCall `json:"tool_calls,omitempty"` +} + +// UnmarshalJSON implements custom unmarshalling for ChatAssistantMessage. +// If Reasoning is non-nil and ReasoningDetails is nil/empty, it adds a single +// ChatReasoningDetails entry of type "reasoning.text" with the text set to Reasoning. +func (cm *ChatAssistantMessage) UnmarshalJSON(data []byte) error { + if cm == nil { + return nil + } + + // Alias to avoid infinite recursion + type Alias ChatAssistantMessage + + var aux Alias + if err := sonic.Unmarshal(data, &aux); err != nil { + return err + } + + // Copy decoded data back into the original type + *cm = ChatAssistantMessage(aux) + + // If Reasoning is present and there are no reasoning_details, + // synthesize a text reasoning_details entry. + if cm.Reasoning != nil && len(cm.ReasoningDetails) == 0 { + text := *cm.Reasoning + cm.ReasoningDetails = []ChatReasoningDetails{ + { + Index: 0, + Type: BifrostReasoningDetailsTypeText, + Text: &text, + }, + } + } + + return nil } // ChatAssistantMessageAnnotation represents an annotation in a response. @@ -589,6 +711,24 @@ type BifrostResponseChoice struct { *ChatStreamResponseChoice } +type BifrostReasoningDetailsType string + +const ( + BifrostReasoningDetailsTypeSummary BifrostReasoningDetailsType = "reasoning.summary" + BifrostReasoningDetailsTypeEncrypted BifrostReasoningDetailsType = "reasoning.encrypted" + BifrostReasoningDetailsTypeText BifrostReasoningDetailsType = "reasoning.text" +) + +// Not in OpenAI's spec, but needed to support inter provider reasoning capabilities. +type ChatReasoningDetails struct { + Index int `json:"index"` + Type BifrostReasoningDetailsType `json:"type"` + Summary *string `json:"summary,omitempty"` + Text *string `json:"text,omitempty"` + Signature *string `json:"signature,omitempty"` + Data *string `json:"data,omitempty"` // for encrypted data +} + // BifrostLogProbs represents the log probabilities for different aspects of a response. type BifrostLogProbs struct { Content []ContentLogProb `json:"content,omitempty"` @@ -614,11 +754,43 @@ type ChatStreamResponseChoice struct { // ChatStreamResponseChoiceDelta represents a delta in the stream response type ChatStreamResponseChoiceDelta struct { - Role *string `json:"role,omitempty"` // Only in the first chunk - Content *string `json:"content,omitempty"` // May be empty string or null - Thought *string `json:"thought,omitempty"` // May be empty string or null - Refusal *string `json:"refusal,omitempty"` // Refusal content if any - ToolCalls []ChatAssistantMessageToolCall `json:"tool_calls,omitempty"` // If tool calls used (supports incremental updates) + Role *string `json:"role,omitempty"` // Only in the first chunk + Content *string `json:"content,omitempty"` // May be empty string or null + Refusal *string `json:"refusal,omitempty"` // Refusal content if any + Reasoning *string `json:"reasoning,omitempty"` // May be empty string or null + ReasoningDetails []ChatReasoningDetails `json:"reasoning_details,omitempty"` + ToolCalls []ChatAssistantMessageToolCall `json:"tool_calls,omitempty"` // If tool calls used (supports incremental updates) +} + +// UnmarshalJSON implements custom unmarshalling for ChatStreamResponseChoiceDelta. +// If Reasoning is non-nil and ReasoningDetails is nil/empty, it adds a single +// ChatReasoningDetails entry of type "reasoning.text" with the text set to Reasoning. +func (d *ChatStreamResponseChoiceDelta) UnmarshalJSON(data []byte) error { + // Alias to avoid infinite recursion + type Alias ChatStreamResponseChoiceDelta + + var aux Alias + if err := sonic.Unmarshal(data, &aux); err != nil { + return err + } + + // Copy decoded data back into the original type + *d = ChatStreamResponseChoiceDelta(aux) + + // If Reasoning is present and there are no reasoning_details, + // synthesize a text reasoning_details entry. + if d.Reasoning != nil && len(d.ReasoningDetails) == 0 { + text := *d.Reasoning + d.ReasoningDetails = []ChatReasoningDetails{ + { + Index: 0, + Type: BifrostReasoningDetailsTypeText, + Text: &text, + }, + } + } + + return nil } // LogProb represents the log probability of a token. @@ -660,6 +832,7 @@ type ChatCompletionTokensDetails struct { CitationTokens *int `json:"citation_tokens,omitempty"` NumSearchQueries *int `json:"num_search_queries,omitempty"` ReasoningTokens int `json:"reasoning_tokens,omitempty"` + ImageTokens *int `json:"image_tokens,omitempty"` RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"` // This means the number of input tokens used to create the cache entry. (cache creation tokens) diff --git a/core/schemas/mux.go b/core/schemas/mux.go index 4d24d94e6..14423dc0c 100644 --- a/core/schemas/mux.go +++ b/core/schemas/mux.go @@ -770,9 +770,10 @@ func (bcr *BifrostChatRequest) ToResponsesRequest() *BifrostResponsesRequest { } // Handle Reasoning from reasoning_effort - if bcr.Params.ReasoningEffort != nil { + if bcr.Params.Reasoning != nil && (bcr.Params.Reasoning.Effort != nil || bcr.Params.Reasoning.MaxTokens != nil) { brr.Params.Reasoning = &ResponsesParametersReasoning{ - Effort: bcr.Params.ReasoningEffort, + Effort: bcr.Params.Reasoning.Effort, + MaxTokens: bcr.Params.Reasoning.MaxTokens, } } @@ -848,9 +849,12 @@ func (brr *BifrostResponsesRequest) ToChatRequest() *BifrostChatRequest { bcr.Params.ToolChoice = chatToolChoice } - // Handle ReasoningEffort from Reasoning - if brr.Params.Reasoning != nil && brr.Params.Reasoning.Effort != nil { - bcr.Params.ReasoningEffort = brr.Params.Reasoning.Effort + // Handle Reasoning from Reasoning + if brr.Params.Reasoning != nil { + bcr.Params.Reasoning = &ChatReasoning{ + Effort: brr.Params.Reasoning.Effort, + MaxTokens: brr.Params.Reasoning.MaxTokens, + } } // Handle Verbosity from Text config @@ -1354,13 +1358,13 @@ func (cr *BifrostChatResponse) ToBifrostResponsesStreamResponse(state *ChatToRes } } - if delta.Thought != nil && *delta.Thought != "" { + if delta.Reasoning != nil && *delta.Reasoning != "" { // Reasoning/thought content delta (for models that support reasoning) response := &BifrostResponsesStreamResponse{ Type: ResponsesStreamResponseTypeReasoningSummaryTextDelta, SequenceNumber: state.SequenceNumber, OutputIndex: Ptr(0), - Delta: delta.Thought, + Delta: delta.Reasoning, ExtraFields: cr.ExtraFields, } responses = append(responses, response) diff --git a/core/schemas/responses.go b/core/schemas/responses.go index d1c54bd39..d9e7cef6c 100644 --- a/core/schemas/responses.go +++ b/core/schemas/responses.go @@ -231,9 +231,10 @@ type ResponsesPrompt struct { } type ResponsesParametersReasoning struct { - Effort *string `json:"effort,omitempty"` // "minimal" | "low" | "medium" | "high" + Effort *string `json:"effort,omitempty"` // "none" | "minimal" | "low" | "medium" | "high" (any value other than "none" will enable reasoning) GenerateSummary *string `json:"generate_summary,omitempty"` // Deprecated: use summary instead Summary *string `json:"summary,omitempty"` // "auto" | "concise" | "detailed" + MaxTokens *int `json:"max_tokens,omitempty"` // Maximum number of tokens to generate for the reasoning output (required for anthropic) } type ResponsesResponseConversationStruct struct { @@ -320,6 +321,7 @@ type ResponsesMessage struct { *ResponsesToolMessage // For Tool calls and outputs // Reasoning + // gpt-oss models include only reasoning_text content blocks in a message, while other openai models include summaries+encrypted_content *ResponsesReasoning } diff --git a/framework/streaming/chat.go b/framework/streaming/chat.go index b4bd86c64..eb952997a 100644 --- a/framework/streaming/chat.go +++ b/framework/streaming/chat.go @@ -36,11 +36,73 @@ func (a *Accumulator) buildCompleteMessageFromChatStreamChunks(chunks []*ChatStr completeMessage.ChatAssistantMessage = &schemas.ChatAssistantMessage{} } if completeMessage.ChatAssistantMessage.Refusal == nil { - completeMessage.ChatAssistantMessage.Refusal = chunk.Delta.Refusal + completeMessage.ChatAssistantMessage.Refusal = bifrost.Ptr(*chunk.Delta.Refusal) } else { *completeMessage.ChatAssistantMessage.Refusal += *chunk.Delta.Refusal } } + // Handle reasoning + if chunk.Delta.Reasoning != nil && *chunk.Delta.Reasoning != "" { + if completeMessage.ChatAssistantMessage == nil { + completeMessage.ChatAssistantMessage = &schemas.ChatAssistantMessage{} + } + if completeMessage.ChatAssistantMessage.Reasoning == nil { + completeMessage.ChatAssistantMessage.Reasoning = bifrost.Ptr(*chunk.Delta.Reasoning) + } else { + *completeMessage.ChatAssistantMessage.Reasoning += *chunk.Delta.Reasoning + } + } + // Handle reasoning details + if len(chunk.Delta.ReasoningDetails) > 0 { + if completeMessage.ChatAssistantMessage == nil { + completeMessage.ChatAssistantMessage = &schemas.ChatAssistantMessage{} + } + // Check if the reasoning detail already exists on that index, if so, update it else add it to the list + for _, reasoningDetail := range chunk.Delta.ReasoningDetails { + found := false + for i := range completeMessage.ChatAssistantMessage.ReasoningDetails { + existingReasoningDetail := &completeMessage.ChatAssistantMessage.ReasoningDetails[i] + if existingReasoningDetail.Index == reasoningDetail.Index { + // Update text - accumulate if both exist + if reasoningDetail.Text != nil { + if existingReasoningDetail.Text == nil { + existingReasoningDetail.Text = reasoningDetail.Text + } else { + *existingReasoningDetail.Text += *reasoningDetail.Text + } + } + // Update signature - overwrite (signatures are typically final) + if reasoningDetail.Signature != nil { + existingReasoningDetail.Signature = reasoningDetail.Signature + } + // Update other fields if present + if reasoningDetail.Summary != nil { + if existingReasoningDetail.Summary == nil { + existingReasoningDetail.Summary = reasoningDetail.Summary + } else { + *existingReasoningDetail.Summary += *reasoningDetail.Summary + } + } + if reasoningDetail.Data != nil { + if existingReasoningDetail.Data == nil { + existingReasoningDetail.Data = reasoningDetail.Data + } else { + *existingReasoningDetail.Data += *reasoningDetail.Data + } + } + if reasoningDetail.Type != "" { + existingReasoningDetail.Type = reasoningDetail.Type + } + found = true + break + } + } + // If not found, add it to the list + if !found { + completeMessage.ChatAssistantMessage.ReasoningDetails = append(completeMessage.ChatAssistantMessage.ReasoningDetails, reasoningDetail) + } + } + } // Accumulate tool calls if len(chunk.Delta.ToolCalls) > 0 { a.accumulateToolCallsInMessage(completeMessage, chunk.Delta.ToolCalls) diff --git a/plugins/jsonparser/utils.go b/plugins/jsonparser/utils.go index d968eae9a..9acf1112f 100644 --- a/plugins/jsonparser/utils.go +++ b/plugins/jsonparser/utils.go @@ -308,7 +308,7 @@ func (p *JsonParserPlugin) deepCopyChatStreamResponseChoiceDelta(original *schem result := &schemas.ChatStreamResponseChoiceDelta{ Role: original.Role, - Thought: original.Thought, // Shallow copy + Reasoning: original.Reasoning, // Shallow copy Refusal: original.Refusal, // Shallow copy ToolCalls: original.ToolCalls, // Shallow copy - we don't modify tool calls } diff --git a/plugins/maxim/main.go b/plugins/maxim/main.go index 4cc977dc3..c78a49a81 100644 --- a/plugins/maxim/main.go +++ b/plugins/maxim/main.go @@ -470,10 +470,23 @@ func (plugin *Plugin) PostHook(ctx *schemas.BifrostContext, result *schemas.Bifr generationID, ok := (*ctx).Value(GenerationIDKey).(string) if ok { if bifrostErr != nil { + // Safely extract message from nested error + message := "" + code := "" + errorType := "" + if bifrostErr.Error != nil { + message = bifrostErr.Error.Message + if bifrostErr.Error.Code != nil { + code = *bifrostErr.Error.Code + } + if bifrostErr.Error.Type != nil { + errorType = *bifrostErr.Error.Type + } + } genErr := logging.GenerationError{ - Message: bifrostErr.Error.Message, - Code: bifrostErr.Error.Code, - Type: bifrostErr.Error.Type, + Message: message, + Code: &code, + Type: &errorType, } logger.SetGenerationError(generationID, &genErr) diff --git a/plugins/semanticcache/utils.go b/plugins/semanticcache/utils.go index 8b83d053f..08b37e2d1 100644 --- a/plugins/semanticcache/utils.go +++ b/plugins/semanticcache/utils.go @@ -666,8 +666,8 @@ func (plugin *Plugin) extractChatParametersToMetadata(params *schemas.ChatParame if params.PromptCacheKey != nil { metadata["prompt_cache_key"] = *params.PromptCacheKey } - if params.ReasoningEffort != nil { - metadata["reasoning_effort"] = *params.ReasoningEffort + if params.Reasoning != nil && params.Reasoning.Effort != nil { + metadata["reasoning_effort"] = *params.Reasoning.Effort } if params.ResponseFormat != nil { metadata["response_format"] = params.ResponseFormat @@ -749,6 +749,9 @@ func (plugin *Plugin) extractResponsesParametersToMetadata(params *schemas.Respo if params.Reasoning.Effort != nil { metadata["reasoning_effort"] = *params.Reasoning.Effort } + if params.Reasoning.MaxTokens != nil { + metadata["reasoning_max_tokens"] = *params.Reasoning.MaxTokens + } if params.Reasoning.Summary != nil { metadata["reasoning_summary"] = *params.Reasoning.Summary } diff --git a/transports/bifrost-http/handlers/inference.go b/transports/bifrost-http/handlers/inference.go index ecdf91140..bed9ca07e 100644 --- a/transports/bifrost-http/handlers/inference.go +++ b/transports/bifrost-http/handlers/inference.go @@ -74,7 +74,7 @@ var chatParamsKnownFields = map[string]bool{ "parallel_tool_calls": true, "presence_penalty": true, "prompt_cache_key": true, - "reasoning_effort": true, + "reasoning": true, "response_format": true, "safety_identifier": true, "service_tier": true, @@ -166,6 +166,38 @@ type ChatRequest struct { *schemas.ChatParameters } +// UnmarshalJSON implements custom JSON unmarshalling for ChatRequest. +// This is needed because ChatParameters has a custom UnmarshalJSON method, +// which interferes with sonic's handling of the embedded BifrostParams struct. +func (cr *ChatRequest) UnmarshalJSON(data []byte) error { + // First, unmarshal BifrostParams fields directly + type bifrostAlias BifrostParams + var bp bifrostAlias + if err := sonic.Unmarshal(data, &bp); err != nil { + return err + } + cr.BifrostParams = BifrostParams(bp) + + // Unmarshal messages + var msgStruct struct { + Messages []schemas.ChatMessage `json:"messages"` + } + if err := sonic.Unmarshal(data, &msgStruct); err != nil { + return err + } + cr.Messages = msgStruct.Messages + + // Unmarshal ChatParameters (which has its own custom unmarshaller) + if cr.ChatParameters == nil { + cr.ChatParameters = &schemas.ChatParameters{} + } + if err := sonic.Unmarshal(data, cr.ChatParameters); err != nil { + return err + } + + return nil +} + // ResponsesRequestInput is a union of string and array of responses messages type ResponsesRequestInput struct { ResponsesRequestInputStr *string @@ -236,7 +268,7 @@ func parseFallbacks(fallbackStrings []string) ([]schemas.Fallback, error) { func extractExtraParams(data []byte, knownFields map[string]bool) (map[string]interface{}, error) { // Parse JSON to extract unknown fields var rawData map[string]json.RawMessage - if err := json.Unmarshal(data, &rawData); err != nil { + if err := sonic.Unmarshal(data, &rawData); err != nil { return nil, err } @@ -245,7 +277,7 @@ func extractExtraParams(data []byte, knownFields map[string]bool) (map[string]in for key, value := range rawData { if !knownFields[key] { var v interface{} - if err := json.Unmarshal(value, &v); err != nil { + if err := sonic.Unmarshal(value, &v); err != nil { continue // Skip fields that can't be unmarshaled } extraParams[key] = v @@ -441,7 +473,9 @@ func (h *CompletionHandler) textCompletion(ctx *fasthttp.RequestCtx) { // chatCompletion handles POST /v1/chat/completions - Process chat completion requests func (h *CompletionHandler) chatCompletion(ctx *fasthttp.RequestCtx) { - var req ChatRequest + req := ChatRequest{ + ChatParameters: &schemas.ChatParameters{}, + } if err := sonic.Unmarshal(ctx.PostBody(), &req); err != nil { SendError(ctx, fasthttp.StatusBadRequest, fmt.Sprintf("Invalid request format: %v", err)) return diff --git a/ui/app/workspace/logs/views/logChatMessageView.tsx b/ui/app/workspace/logs/views/logChatMessageView.tsx index 93cc9b042..d7424ff3d 100644 --- a/ui/app/workspace/logs/views/logChatMessageView.tsx +++ b/ui/app/workspace/logs/views/logChatMessageView.tsx @@ -70,23 +70,25 @@ export default function LogChatMessageView({ message }: LogChatMessageViewProps) {message.tool_call_id && Tool Call ID: {message.tool_call_id}} - {/* Handle thought content */} - {message.thought && ( + {/* Handle reasoning content */} + {message.reasoning && (