maximhq
diff --git a/‎core/providers/anthropic/responses.go‎
Lines changed: 55 additions & 6 deletions b/‎core/providers/anthropic/responses.go‎
Lines changed: 55 additions & 6 deletions
diff --git a/‎core/providers/anthropic/types.go‎
Lines changed: 3 additions & 3 deletions b/‎core/providers/anthropic/types.go‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎core/providers/bedrock/responses.go‎
Lines changed: 152 additions & 0 deletions b/‎core/providers/bedrock/responses.go‎
Lines changed: 152 additions & 0 deletions
diff --git a/‎core/providers/openai/openai.go‎
Lines changed: 0 additions & 3 deletions b/‎core/providers/openai/openai.go‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎core/providers/utils/utils.go‎
Lines changed: 14 additions & 0 deletions b/‎core/providers/utils/utils.go‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎core/providers/vertex/errors.go‎
Lines changed: 10 additions & 4 deletions b/‎core/providers/vertex/errors.go‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎core/schemas/bifrost.go‎
Lines changed: 1 addition & 0 deletions b/‎core/schemas/bifrost.go‎
Lines changed: 1 addition & 0 deletions
@@ -753,6 +753,29 @@ func (chunk *AnthropicStreamEvent) ToBifrostResponsesStream(ctx context.Context,
 		if chunk.Delta.StopReason != nil {
 			state.StopReason = schemas.Ptr(ConvertAnthropicFinishReasonToBifrost(*chunk.Delta.StopReason))
 		}
+		// if ctx.Value(schemas.BifrostContextKeyIntegrationType) == "anthropic" {
+		// 	var usage *schemas.ResponsesResponseUsage
+		// 	if chunk.Usage != nil {
+		// 		usage = &schemas.ResponsesResponseUsage{
+		// 			InputTokens:  chunk.Usage.InputTokens,
+		// 			OutputTokens: chunk.Usage.OutputTokens,
+		// 			TotalTokens:  chunk.Usage.InputTokens + chunk.Usage.OutputTokens,
+		// 			InputTokensDetails: &schemas.ResponsesResponseInputTokens{
+		// 				CachedTokens: chunk.Usage.CacheReadInputTokens,
+		// 			},
+		// 			OutputTokensDetails: &schemas.ResponsesResponseOutputTokens{
+		// 				CachedTokens: chunk.Usage.CacheCreationInputTokens,
+		// 			},
+		// 		}
+		// 	}
+		// 	return []*schemas.BifrostResponsesStreamResponse{{
+		// 		Type:           "anthropic.message_delta",
+		// 		SequenceNumber: sequenceNumber,
+		// 		Response: &schemas.BifrostResponsesResponse{
+		// 			Usage: usage,
+		// 		},
+		// 	}}, nil, false
+		// }
 		// Message-level updates (like stop reason, usage, etc.)
 		// Note: We don't emit output_item.done here because items are already closed
 		// by content_block_stop. This event is informational only.
@@ -808,7 +831,7 @@ func (chunk *AnthropicStreamEvent) ToBifrostResponsesStream(ctx context.Context,
 }
 
 // ToAnthropicResponsesStreamResponse converts a Bifrost Responses stream response to Anthropic SSE string format
-func ToAnthropicResponsesStreamResponse(bifrostResp *schemas.BifrostResponsesStreamResponse) []*AnthropicStreamEvent {
+func ToAnthropicResponsesStreamResponse(ctx context.Context, bifrostResp *schemas.BifrostResponsesStreamResponse) []*AnthropicStreamEvent {
 	if bifrostResp == nil {
 		return nil
 	}
@@ -1189,6 +1212,17 @@ func ToAnthropicResponsesStreamResponse(bifrostResp *schemas.BifrostResponsesStr
 			}
 		}
 
+	// case "anthropic.message_delta":
+	// 	if ctx.Value(schemas.BifrostContextKeyIntegrationType) == "anthropic" {
+	// 		streamResp.Type = AnthropicStreamEventTypeMessageDelta
+	// 		if bifrostResp.Response != nil && bifrostResp.Response.Usage != nil {
+	// 			streamResp.Usage = &AnthropicUsage{
+	// 				InputTokens:  bifrostResp.Response.Usage.InputTokens,
+	// 				OutputTokens: bifrostResp.Response.Usage.OutputTokens,
+	// 			}
+	// 		}
+	// 	}
+
 	default:
 		// Unknown event type, return empty
 		return nil
@@ -1230,16 +1264,13 @@ func (request *AnthropicMessageRequest) ToBifrostResponsesRequest() *schemas.Bif
 	if request.StopSequences != nil {
 		params.ExtraParams["stop"] = request.StopSequences
 	}
-	if request.Thinking != nil {
-		params.ExtraParams["thinking"] = request.Thinking
-	}
 	if request.OutputFormat != nil {
 		params.Text = convertAnthropicOutputFormatToResponsesTextConfig(request.OutputFormat)
 	}
 	if request.Thinking != nil {
 		if request.Thinking.Type == "enabled" {
 			params.Reasoning = &schemas.ResponsesParametersReasoning{
-				Effort:    schemas.Ptr("auto"),
+				Effort:    schemas.Ptr("medium"), // TODO: add a relative measure with budget tokens and max tokens
 				MaxTokens: request.Thinking.BudgetTokens,
 			}
 		} else {
@@ -1616,6 +1647,23 @@ func ConvertBifrostMessagesToAnthropicMessages(bifrostMessages []schemas.Respons
 				continue
 			}
 
+			// If there are pending reasoning blocks and this is a user message,
+			// flush them into a separate assistant message first
+			// (thinking blocks can only appear in assistant messages in Anthropic)
+			if len(pendingReasoningContentBlocks) > 0 && (msg.Role == nil || *msg.Role == schemas.ResponsesInputMessageRoleUser) {
+				// Copy the pending reasoning content blocks
+				copied := make([]AnthropicContentBlock, len(pendingReasoningContentBlocks))
+				copy(copied, pendingReasoningContentBlocks)
+				assistantReasoningMsg := AnthropicMessage{
+					Role: AnthropicMessageRoleAssistant,
+					Content: AnthropicContent{
+						ContentBlocks: copied,
+					},
+				}
+				anthropicMessages = append(anthropicMessages, assistantReasoningMsg)
+				pendingReasoningContentBlocks = nil
+			}
+
 			// Regular user/assistant message
 			anthropicMsg := convertBifrostMessageToAnthropicMessage(&msg, &pendingReasoningContentBlocks)
 			if anthropicMsg != nil {
@@ -2072,7 +2120,8 @@ func convertBifrostMessageToAnthropicMessage(msg *schemas.ResponsesMessage, pend
 	}
 
 	// Add any pending reasoning content blocks to the message
-	if len(*pendingReasoningContentBlocks) > 0 {
+	// Only add reasoning blocks to assistant messages (thinking blocks can only appear in assistant messages in Anthropic)
+	if len(*pendingReasoningContentBlocks) > 0 && anthropicMsg.Role == AnthropicMessageRoleAssistant {
 		// copy the pending reasoning content blocks
 		copied := make([]AnthropicContentBlock, len(*pendingReasoningContentBlocks))
 		copy(copied, *pendingReasoningContentBlocks)
 
@@ -298,9 +298,9 @@ type AnthropicTextResponse struct {
 // AnthropicUsage represents usage information in Anthropic format
 type AnthropicUsage struct {
 	InputTokens              int                         `json:"input_tokens"`
-	CacheCreationInputTokens int                         `json:"cache_creation_input_tokens,omitempty"`
-	CacheReadInputTokens     int                         `json:"cache_read_input_tokens,omitempty"`
-	CacheCreation            AnthropicUsageCacheCreation `json:"cache_creation,omitempty"`
+	CacheCreationInputTokens int                         `json:"cache_creation_input_tokens"`
+	CacheReadInputTokens     int                         `json:"cache_read_input_tokens"`
+	CacheCreation            AnthropicUsageCacheCreation `json:"cache_creation"`
 	OutputTokens             int                         `json:"output_tokens"`
 }
 
 
@@ -215,6 +215,158 @@ func (chunk *BedrockStreamEvent) ToBifrostResponsesStream(sequenceNumber int, st
 		if chunk.Start.ToolUse != nil {
 			var responses []*schemas.BifrostResponsesStreamResponse
 
+			// Close any open reasoning blocks first (Anthropic sends content_block_stop before starting new blocks)
+			for prevContentIndex := range state.ReasoningContentIndices {
+				prevOutputIndex, prevExists := state.ContentIndexToOutputIndex[prevContentIndex]
+				if !prevExists {
+					continue
+				}
+
+				// Skip already completed output indices
+				if state.CompletedOutputIndices[prevOutputIndex] {
+					continue
+				}
+
+				itemID := state.ItemIDs[prevOutputIndex]
+
+				// For reasoning items, content_index is always 0
+				reasoningContentIndex := 0
+
+				// Emit reasoning_summary_text.done
+				emptyText := ""
+				reasoningDoneResponse := &schemas.BifrostResponsesStreamResponse{
+					Type:           schemas.ResponsesStreamResponseTypeReasoningSummaryTextDone,
+					SequenceNumber: sequenceNumber + len(responses),
+					OutputIndex:    schemas.Ptr(prevOutputIndex),
+					ContentIndex:   &reasoningContentIndex,
+					Text:           &emptyText,
+				}
+				if itemID != "" {
+					reasoningDoneResponse.ItemID = &itemID
+				}
+				responses = append(responses, reasoningDoneResponse)
+
+				// Emit content_part.done for reasoning
+				partDoneResponse := &schemas.BifrostResponsesStreamResponse{
+					Type:           schemas.ResponsesStreamResponseTypeContentPartDone,
+					SequenceNumber: sequenceNumber + len(responses),
+					OutputIndex:    schemas.Ptr(prevOutputIndex),
+					ContentIndex:   &reasoningContentIndex,
+				}
+				if itemID != "" {
+					partDoneResponse.ItemID = &itemID
+				}
+				responses = append(responses, partDoneResponse)
+
+				// Emit output_item.done for reasoning
+				statusCompleted := "completed"
+				doneItem := &schemas.ResponsesMessage{
+					Status: &statusCompleted,
+				}
+				if itemID != "" {
+					doneItem.ID = &itemID
+				}
+				responses = append(responses, &schemas.BifrostResponsesStreamResponse{
+					Type:           schemas.ResponsesStreamResponseTypeOutputItemDone,
+					SequenceNumber: sequenceNumber + len(responses),
+					OutputIndex:    schemas.Ptr(prevOutputIndex),
+					ContentIndex:   &reasoningContentIndex,
+					Item:           doneItem,
+				})
+
+				// Mark this output index as completed
+				state.CompletedOutputIndices[prevOutputIndex] = true
+			}
+			// Clear reasoning content indices after closing them
+			clear(state.ReasoningContentIndices)
+
+			// Close any open tool call blocks before starting a new one (Anthropic completes each block before starting next)
+			for prevContentIndex, prevOutputIndex := range state.ContentIndexToOutputIndex {
+				// Skip reasoning blocks (already handled above)
+				if state.ReasoningContentIndices[prevContentIndex] {
+					continue
+				}
+
+				// Skip already completed output indices
+				if state.CompletedOutputIndices[prevOutputIndex] {
+					continue
+				}
+
+				// Check if this is a tool call
+				prevToolCallID := state.ToolCallIDs[prevOutputIndex]
+				if prevToolCallID == "" {
+					continue // Not a tool call
+				}
+
+				prevItemID := state.ItemIDs[prevOutputIndex]
+				prevToolName := state.ToolCallNames[prevOutputIndex]
+				accumulatedArgs := state.ToolArgumentBuffers[prevOutputIndex]
+
+				// Emit content_part.done for tool call
+				responses = append(responses, &schemas.BifrostResponsesStreamResponse{
+					Type:           schemas.ResponsesStreamResponseTypeContentPartDone,
+					SequenceNumber: sequenceNumber + len(responses),
+					OutputIndex:    schemas.Ptr(prevOutputIndex),
+					ContentIndex:   schemas.Ptr(prevContentIndex),
+					ItemID:         &prevItemID,
+				})
+
+				// Emit function_call_arguments.done with full arguments
+				if accumulatedArgs != "" {
+					var doneItem *schemas.ResponsesMessage
+					if prevToolCallID != "" || prevToolName != "" {
+						doneItem = &schemas.ResponsesMessage{
+							ResponsesToolMessage: &schemas.ResponsesToolMessage{},
+						}
+						if prevToolCallID != "" {
+							doneItem.ResponsesToolMessage.CallID = &prevToolCallID
+						}
+						if prevToolName != "" {
+							doneItem.ResponsesToolMessage.Name = &prevToolName
+						}
+					}
+
+					argsDoneResponse := &schemas.BifrostResponsesStreamResponse{
+						Type:           schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDone,
+						SequenceNumber: sequenceNumber + len(responses),
+						OutputIndex:    schemas.Ptr(prevOutputIndex),
+						Arguments:      &accumulatedArgs,
+					}
+					if prevItemID != "" {
+						argsDoneResponse.ItemID = &prevItemID
+					}
+					if doneItem != nil {
+						argsDoneResponse.Item = doneItem
+					}
+					responses = append(responses, argsDoneResponse)
+				}
+
+				// Emit output_item.done for tool call
+				statusCompleted := "completed"
+				toolDoneItem := &schemas.ResponsesMessage{
+					ID:     &prevItemID,
+					Type:   schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall),
+					Status: &statusCompleted,
+					ResponsesToolMessage: &schemas.ResponsesToolMessage{
+						CallID:    &prevToolCallID,
+						Name:      &prevToolName,
+						Arguments: &accumulatedArgs,
+					},
+				}
+
+				responses = append(responses, &schemas.BifrostResponsesStreamResponse{
+					Type:           schemas.ResponsesStreamResponseTypeOutputItemDone,
+					SequenceNumber: sequenceNumber + len(responses),
+					OutputIndex:    schemas.Ptr(prevOutputIndex),
+					ContentIndex:   schemas.Ptr(prevContentIndex),
+					ItemID:         &prevItemID,
+					Item:           toolDoneItem,
+				})
+
+				// Mark this output index as completed
+				state.CompletedOutputIndices[prevOutputIndex] = true
+			}
+
 			// Create new output index for this tool use
 			outputIndex := state.CurrentOutputIndex
 			state.ContentIndexToOutputIndex[contentBlockIndex] = outputIndex
 
@@ -1154,9 +1154,6 @@ func HandleOpenAIResponsesRequest(
 		return nil, bifrostErr
 	}
 
-	fmt.Println("jsonData", string(jsonData))
-	fmt.Println("--------------------------------")
-
 	req.SetBody(jsonData)
 
 	// Make request
 
@@ -319,6 +319,20 @@ func HandleProviderAPIError(resp *fasthttp.Response, errorResp any) *schemas.Bif
 	statusCode := resp.StatusCode()
 	body := append([]byte(nil), resp.Body()...)
 
+	// decode body
+	decodedBody, err := CheckAndDecodeBody(resp)
+	if err != nil {
+		return &schemas.BifrostError{
+			IsBifrostError: false,
+			StatusCode:     &statusCode,
+			Error: &schemas.ErrorField{
+				Message: err.Error(),
+			},
+		}
+	}
+
+	body = decodedBody
+
 	if err := sonic.Unmarshal(body, errorResp); err != nil {
 		rawResponse := body
 		message := fmt.Sprintf("provider API error: %s", string(rawResponse))
 
@@ -10,15 +10,21 @@ import (
 func parseVertexError(providerName schemas.ModelProvider, resp *fasthttp.Response) *schemas.BifrostError {
 	var openAIErr schemas.BifrostError
 	var vertexErr []VertexError
-	if err := sonic.Unmarshal(resp.Body(), &openAIErr); err != nil || openAIErr.Error == nil {
+
+	decodedBody, err := providerUtils.CheckAndDecodeBody(resp)
+	if err != nil {
+		return providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseDecode, err, providerName)
+	}
+
+	if err := sonic.Unmarshal(decodedBody, &openAIErr); err != nil || openAIErr.Error == nil {
 		// Try Vertex error format if OpenAI format fails or is incomplete
-		if err := sonic.Unmarshal(resp.Body(), &vertexErr); err != nil {
+		if err := sonic.Unmarshal(decodedBody, &vertexErr); err != nil {
 			//try with single Vertex error format
 			var vertexErr VertexError
-			if err := sonic.Unmarshal(resp.Body(), &vertexErr); err != nil {
+			if err := sonic.Unmarshal(decodedBody, &vertexErr); err != nil {
 				// Try VertexValidationError format (validation errors from Mistral endpoint)
 				var validationErr VertexValidationError
-				if err := sonic.Unmarshal(resp.Body(), &validationErr); err != nil {
+				if err := sonic.Unmarshal(decodedBody, &validationErr); err != nil {
 					return providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseUnmarshal, err, providerName)
 				}
 				if len(validationErr.Detail) > 0 {
 
@@ -117,6 +117,7 @@ const (
 	BifrostContextKeyUseRawRequestBody                   BifrostContextKey = "bifrost-use-raw-request-body"
 	BifrostContextKeySendBackRawRequest                  BifrostContextKey = "bifrost-send-back-raw-request"                    // bool
 	BifrostContextKeySendBackRawResponse                 BifrostContextKey = "bifrost-send-back-raw-response"                   // bool
+	BifrostContextKeyIntegrationType                     BifrostContextKey = "bifrost-integration-type"                         // RouteConfigType
 	BifrostContextKeyIsResponsesToChatCompletionFallback BifrostContextKey = "bifrost-is-responses-to-chat-completion-fallback" // bool (set by bifrost)
 )
Original file line number	Diff line number	Diff line change
`@@ -1154,9 +1154,6 @@ func HandleOpenAIResponsesRequest(`
`1154`	`1154`	`return nil, bifrostErr`
`1155`	`1155`	`}`
`1156`	`1156`
`1157`		`- fmt.Println("jsonData", string(jsonData))`
`1158`		`- fmt.Println("--------------------------------")`
`1159`		`-`
`1160`	`1157`	`req.SetBody(jsonData)`
`1161`	`1158`
`1162`	`1159`	`// Make request`
Original file line number	Diff line number	Diff line change
`@@ -117,6 +117,7 @@ const (`
`117`	`117`	`BifrostContextKeyUseRawRequestBody BifrostContextKey = "bifrost-use-raw-request-body"`
`118`	`118`	`BifrostContextKeySendBackRawRequest BifrostContextKey = "bifrost-send-back-raw-request" // bool`
`119`	`119`	`BifrostContextKeySendBackRawResponse BifrostContextKey = "bifrost-send-back-raw-response" // bool`
	`120`	`+ BifrostContextKeyIntegrationType BifrostContextKey = "bifrost-integration-type" // RouteConfigType`
`120`	`121`	`BifrostContextKeyIsResponsesToChatCompletionFallback BifrostContextKey = "bifrost-is-responses-to-chat-completion-fallback" // bool (set by bifrost)`
`121`	`122`	`)`
`122`	`123`