diff --git a/core/changelog.md b/core/changelog.md index e69de29bb..49c69d7c9 100644 --- a/core/changelog.md +++ b/core/changelog.md @@ -0,0 +1 @@ +- feat: add document/file support for Anthropic, Bedrock, and Gemini \ No newline at end of file diff --git a/core/providers/anthropic/chat.go b/core/providers/anthropic/chat.go index 335a08f13..50cb24fac 100644 --- a/core/providers/anthropic/chat.go +++ b/core/providers/anthropic/chat.go @@ -251,6 +251,8 @@ func ToAnthropicChatRequest(bifrostReq *schemas.BifrostChatRequest) (*AnthropicM }) } else if block.ImageURLStruct != nil { content = append(content, ConvertToAnthropicImageBlock(block)) + } else if block.File != nil { + content = append(content, ConvertToAnthropicDocumentBlock(block)) } } } diff --git a/core/providers/anthropic/responses.go b/core/providers/anthropic/responses.go index a8022a57c..1c8803831 100644 --- a/core/providers/anthropic/responses.go +++ b/core/providers/anthropic/responses.go @@ -2205,6 +2205,22 @@ func convertAnthropicContentBlocksToResponsesMessagesGrouped(contentBlocks []Ant bifrostMessages = append(bifrostMessages, bifrostMsg) } + case AnthropicContentBlockTypeDocument: + // Handle document blocks similar to images + if block.Source != nil { + bifrostMsg := schemas.ResponsesMessage{ + Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), + Role: role, + Content: &schemas.ResponsesMessageContent{ + ContentBlocks: []schemas.ResponsesMessageContentBlock{block.toBifrostResponsesDocumentBlock()}, + }, + } + if isOutputMessage { + bifrostMsg.ID = schemas.Ptr("msg_" + utils.GetRandomString(50)) + } + bifrostMessages = append(bifrostMessages, bifrostMsg) + } + case AnthropicContentBlockTypeThinking: if block.Thinking != nil { bifrostMsg := schemas.ResponsesMessage{ @@ -2413,6 +2429,20 @@ func convertAnthropicContentBlocksToResponsesMessages(contentBlocks []AnthropicC } bifrostMessages = append(bifrostMessages, bifrostMsg) } + case AnthropicContentBlockTypeDocument: + if block.Source != nil { + bifrostMsg := schemas.ResponsesMessage{ + Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), + Role: role, + Content: &schemas.ResponsesMessageContent{ + ContentBlocks: []schemas.ResponsesMessageContentBlock{block.toBifrostResponsesDocumentBlock()}, + }, + } + if isOutputMessage { + bifrostMsg.ID = schemas.Ptr("msg_" + utils.GetRandomString(50)) + } + bifrostMessages = append(bifrostMessages, bifrostMsg) + } case AnthropicContentBlockTypeThinking: if block.Thinking != nil { // Collect reasoning blocks to create a single reasoning message @@ -3353,6 +3383,15 @@ func convertContentBlockToAnthropic(block schemas.ResponsesMessageContentBlock) anthropicBlock := ConvertToAnthropicImageBlock(chatBlock) return &anthropicBlock } + case schemas.ResponsesInputMessageContentBlockTypeFile: + if block.ResponsesInputMessageContentBlockFile != nil { + // Direct conversion without intermediate ChatContentBlock + anthropicBlock := ConvertResponsesFileBlockToAnthropic( + block.ResponsesInputMessageContentBlockFile, + block.CacheControl, + ) + return &anthropicBlock + } case schemas.ResponsesOutputMessageContentTypeReasoning: if block.Text != nil { return &AnthropicContentBlock{ @@ -3392,6 +3431,51 @@ func (block AnthropicContentBlock) toBifrostResponsesImageBlock() schemas.Respon } } +func (block AnthropicContentBlock) toBifrostResponsesDocumentBlock() schemas.ResponsesMessageContentBlock { + resultBlock := schemas.ResponsesMessageContentBlock{ + Type: schemas.ResponsesInputMessageContentBlockTypeFile, + CacheControl: block.CacheControl, + ResponsesInputMessageContentBlockFile: &schemas.ResponsesInputMessageContentBlockFile{}, + } + + // Set filename from title if available + if block.Title != nil { + resultBlock.ResponsesInputMessageContentBlockFile.Filename = block.Title + } + + if block.Source == nil { + return resultBlock + } + + // Handle different source types + switch block.Source.Type { + case "url": + // URL source + if block.Source.URL != nil { + resultBlock.ResponsesInputMessageContentBlockFile.FileURL = block.Source.URL + } + case "base64": + // Base64 encoded data + if block.Source.Data != nil { + // Construct data URL with media type + mediaType := "application/pdf" + if block.Source.MediaType != nil { + mediaType = *block.Source.MediaType + } + dataURL := "data:" + mediaType + ";base64," + *block.Source.Data + resultBlock.ResponsesInputMessageContentBlockFile.FileData = &dataURL + } + case "text": + // Plain text source + if block.Source.Data != nil { + resultBlock.ResponsesInputMessageContentBlockFile.FileType = schemas.Ptr("text/plain") + resultBlock.ResponsesInputMessageContentBlockFile.FileData = block.Source.Data + } + } + + return resultBlock +} + // Helper functions for MCP tool/server conversion // convertAnthropicMCPServerToBifrostTool converts a single Anthropic MCP server to a Bifrost ResponsesTool func convertAnthropicMCPServerToBifrostTool(mcpServer *AnthropicMCPServer) *schemas.ResponsesTool { diff --git a/core/providers/anthropic/types.go b/core/providers/anthropic/types.go index f6976df25..b1c71a496 100644 --- a/core/providers/anthropic/types.go +++ b/core/providers/anthropic/types.go @@ -203,6 +203,7 @@ type AnthropicContentBlockType string const ( AnthropicContentBlockTypeText AnthropicContentBlockType = "text" AnthropicContentBlockTypeImage AnthropicContentBlockType = "image" + AnthropicContentBlockTypeDocument AnthropicContentBlockType = "document" AnthropicContentBlockTypeToolUse AnthropicContentBlockType = "tool_use" AnthropicContentBlockTypeServerToolUse AnthropicContentBlockType = "server_tool_use" AnthropicContentBlockTypeToolResult AnthropicContentBlockType = "tool_result" @@ -215,7 +216,7 @@ const ( // AnthropicContentBlock represents content in Anthropic message format type AnthropicContentBlock struct { - Type AnthropicContentBlockType `json:"type"` // "text", "image", "tool_use", "tool_result", "thinking" + Type AnthropicContentBlockType `json:"type"` // "text", "image", "document", "tool_use", "tool_result", "thinking" Text *string `json:"text,omitempty"` // For text content Thinking *string `json:"thinking,omitempty"` // For thinking content Signature *string `json:"signature,omitempty"` // For signature content @@ -226,16 +227,24 @@ type AnthropicContentBlock struct { Input any `json:"input,omitempty"` // For tool_use content ServerName *string `json:"server_name,omitempty"` // For mcp_tool_use content Content *AnthropicContent `json:"content,omitempty"` // For tool_result content - Source *AnthropicImageSource `json:"source,omitempty"` // For image content + Source *AnthropicSource `json:"source,omitempty"` // For image/document content CacheControl *schemas.CacheControl `json:"cache_control,omitempty"` // For cache control content + Citations *AnthropicCitationsConfig `json:"citations,omitempty"` // For document content + Context *string `json:"context,omitempty"` // For document content + Title *string `json:"title,omitempty"` // For document content } -// AnthropicImageSource represents image source in Anthropic format -type AnthropicImageSource struct { - Type string `json:"type"` // "base64" or "url" - MediaType *string `json:"media_type,omitempty"` // "image/jpeg", "image/png", etc. - Data *string `json:"data,omitempty"` // Base64-encoded image data - URL *string `json:"url,omitempty"` // URL of the image +// AnthropicSource represents image or document source in Anthropic format +type AnthropicSource struct { + Type string `json:"type"` // "base64", "url", "text", "content_block" + MediaType *string `json:"media_type,omitempty"` // "image/jpeg", "image/png", "application/pdf", etc. + Data *string `json:"data,omitempty"` // Base64-encoded data (for base64 type) + URL *string `json:"url,omitempty"` // URL (for url type) +} + +// AnthropicCitationsConfig represents citations configuration for documents +type AnthropicCitationsConfig struct { + Enabled bool `json:"enabled"` } // AnthropicImageContent represents image content in Anthropic format diff --git a/core/providers/anthropic/utils.go b/core/providers/anthropic/utils.go index f7925d4c9..84f4cf1a1 100644 --- a/core/providers/anthropic/utils.go +++ b/core/providers/anthropic/utils.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "strings" "github.com/bytedance/sonic" providerUtils "github.com/maximhq/bifrost/core/providers/utils" @@ -97,7 +98,7 @@ func ConvertToAnthropicImageBlock(block schemas.ChatContentBlock) AnthropicConte imageBlock := AnthropicContentBlock{ Type: AnthropicContentBlockTypeImage, CacheControl: block.CacheControl, - Source: &AnthropicImageSource{}, + Source: &AnthropicSource{}, } if block.ImageURLStruct == nil { @@ -148,6 +149,149 @@ func ConvertToAnthropicImageBlock(block schemas.ChatContentBlock) AnthropicConte return imageBlock } +// ConvertToAnthropicDocumentBlock converts a Bifrost file block to Anthropic document format +func ConvertToAnthropicDocumentBlock(block schemas.ChatContentBlock) AnthropicContentBlock { + documentBlock := AnthropicContentBlock{ + Type: AnthropicContentBlockTypeDocument, + CacheControl: block.CacheControl, + Source: &AnthropicSource{}, + } + + if block.File == nil { + return documentBlock + } + + file := block.File + + // Set title if provided + if file.Filename != nil { + documentBlock.Title = file.Filename + } + + // Handle file_data (base64 encoded data) + if file.FileData != nil && *file.FileData != "" { + fileData := *file.FileData + + // Check if it's plain text based on file type + if file.FileType != nil && (*file.FileType == "text/plain" || *file.FileType == "txt") { + documentBlock.Source.Type = "text" + documentBlock.Source.Data = &fileData + return documentBlock + } + + if strings.HasPrefix(fileData, "data:") { + urlTypeInfo := schemas.ExtractURLTypeInfo(fileData) + + if urlTypeInfo.DataURLWithoutPrefix != nil { + // It's a data URL, extract the base64 content + documentBlock.Source.Type = "base64" + documentBlock.Source.Data = urlTypeInfo.DataURLWithoutPrefix + + // Set media type from data URL or file type + if urlTypeInfo.MediaType != nil { + documentBlock.Source.MediaType = urlTypeInfo.MediaType + } else if file.FileType != nil { + documentBlock.Source.MediaType = file.FileType + } + return documentBlock + } + } + + // Default to base64 for binary files + documentBlock.Source.Type = "base64" + documentBlock.Source.Data = &fileData + + // Set media type + if file.FileType != nil { + documentBlock.Source.MediaType = file.FileType + } else { + // Default to PDF if not specified + mediaType := "application/pdf" + documentBlock.Source.MediaType = &mediaType + } + return documentBlock + } + + return documentBlock +} + +// ConvertResponsesFileBlockToAnthropic converts a Responses file block directly to Anthropic document format +func ConvertResponsesFileBlockToAnthropic(fileBlock *schemas.ResponsesInputMessageContentBlockFile, cacheControl *schemas.CacheControl) AnthropicContentBlock { + documentBlock := AnthropicContentBlock{ + Type: AnthropicContentBlockTypeDocument, + CacheControl: cacheControl, + Source: &AnthropicSource{}, + } + + if fileBlock == nil { + return documentBlock + } + + // Set title if provided + if fileBlock.Filename != nil { + documentBlock.Title = fileBlock.Filename + } + + // Handle file_data (base64 encoded data or plain text) + if fileBlock.FileData != nil && *fileBlock.FileData != "" { + fileData := *fileBlock.FileData + + // Check if it's plain text based on file type + if fileBlock.FileType != nil && (*fileBlock.FileType == "text/plain" || *fileBlock.FileType == "txt") { + documentBlock.Source.Type = "text" + documentBlock.Source.Data = &fileData + documentBlock.Source.MediaType = schemas.Ptr("text/plain") + return documentBlock + } + + // Check if it's a data URL (e.g., "data:application/pdf;base64,...") + if strings.HasPrefix(fileData, "data:") { + urlTypeInfo := schemas.ExtractURLTypeInfo(fileData) + + if urlTypeInfo.DataURLWithoutPrefix != nil { + // It's a data URL, extract the base64 content + documentBlock.Source.Type = "base64" + documentBlock.Source.Data = urlTypeInfo.DataURLWithoutPrefix + + // Set media type from data URL or file type + if urlTypeInfo.MediaType != nil { + documentBlock.Source.MediaType = urlTypeInfo.MediaType + } else if fileBlock.FileType != nil { + documentBlock.Source.MediaType = fileBlock.FileType + } + return documentBlock + } + } + + // Default to base64 for binary files (raw base64 without prefix) + documentBlock.Source.Type = "base64" + documentBlock.Source.Data = &fileData + + // Set media type + if fileBlock.FileType != nil { + documentBlock.Source.MediaType = fileBlock.FileType + } else { + // Default to PDF if not specified + mediaType := "application/pdf" + documentBlock.Source.MediaType = &mediaType + } + return documentBlock + } + + // Handle file URL + if fileBlock.FileURL != nil && *fileBlock.FileURL != "" { + documentBlock.Source.Type = "url" + documentBlock.Source.URL = fileBlock.FileURL + + if fileBlock.FileType != nil { + documentBlock.Source.MediaType = fileBlock.FileType + } + return documentBlock + } + + return documentBlock +} + func (block AnthropicContentBlock) ToBifrostContentImageBlock() schemas.ChatContentBlock { return schemas.ChatContentBlock{ Type: schemas.ChatContentBlockTypeImage, diff --git a/core/providers/bedrock/chat.go b/core/providers/bedrock/chat.go index 326de66af..f3ead7309 100644 --- a/core/providers/bedrock/chat.go +++ b/core/providers/bedrock/chat.go @@ -125,6 +125,44 @@ func (response *BedrockConverseResponse) ToBifrostChatResponse(ctx context.Conte }) reasoningText += contentBlock.ReasoningContent.ReasoningText.Text + "\n" } + + // Handle document content + if contentBlock.Document != nil { + fileBlock := schemas.ChatContentBlock{ + Type: schemas.ChatContentBlockTypeFile, + File: &schemas.ChatInputFile{}, + } + + // Set filename from document name + if contentBlock.Document.Name != "" { + fileBlock.File.Filename = &contentBlock.Document.Name + } + + // Set file type based on format + if contentBlock.Document.Format != "" { + var fileType string + switch contentBlock.Document.Format { + case "pdf": + fileType = "application/pdf" + case "txt", "md", "html", "csv": + fileType = "text/plain" + default: + fileType = "application/pdf" + } + fileBlock.File.FileType = &fileType + } + + // Convert document source data + if contentBlock.Document.Source != nil { + if contentBlock.Document.Source.Bytes != nil { + fileBlock.File.FileData = contentBlock.Document.Source.Bytes + } else if contentBlock.Document.Source.Text != nil { + fileBlock.File.FileData = contentBlock.Document.Source.Text + } + } + + contentBlocks = append(contentBlocks, fileBlock) + } } } diff --git a/core/providers/bedrock/responses.go b/core/providers/bedrock/responses.go index 8975f29dd..2dcfb9db3 100644 --- a/core/providers/bedrock/responses.go +++ b/core/providers/bedrock/responses.go @@ -2,8 +2,10 @@ package bedrock import ( "context" + "encoding/base64" "encoding/json" "fmt" + "strings" "sync" "time" @@ -2461,6 +2463,22 @@ func ConvertBifrostMessagesToBedrockMessages(bifrostMessages []schemas.Responses // (they cannot exist alone in Bedrock without violating the constraint) } + // Merge consecutive messages with the same role + // This ensures document blocks are in the same message as text blocks (Bedrock requirement) + mergedMessages := []BedrockMessage{} + for i := 0; i < len(bedrockMessages); i++ { + currentMsg := bedrockMessages[i] + + // Merge any consecutive messages with the same role + for i+1 < len(bedrockMessages) && bedrockMessages[i+1].Role == currentMsg.Role { + i++ + currentMsg.Content = append(currentMsg.Content, bedrockMessages[i].Content...) + } + + mergedMessages = append(mergedMessages, currentMsg) + } + bedrockMessages = mergedMessages + return bedrockMessages, systemMessages, nil } @@ -2695,6 +2713,58 @@ func convertSingleBedrockMessageToBifrostMessages(ctx *context.Context, msg *Bed outputMessages = append(outputMessages, toolMsg) } + } else if block.Document != nil { + // Document content + role := convertBedrockRoleToBifrostRole(msg.Role) + + // Convert document to file block + fileBlock := schemas.ResponsesMessageContentBlock{ + Type: schemas.ResponsesInputMessageContentBlockTypeFile, + ResponsesInputMessageContentBlockFile: &schemas.ResponsesInputMessageContentBlockFile{}, + } + + // Set filename from document name + if block.Document.Name != "" { + fileBlock.ResponsesInputMessageContentBlockFile.Filename = &block.Document.Name + } + + // Set file type based on format + if block.Document.Format != "" { + var fileType string + switch block.Document.Format { + case "pdf": + fileType = "application/pdf" + case "txt", "md", "html", "csv": + fileType = "text/plain" + default: + fileType = "application/pdf" // Default to PDF + } + fileBlock.ResponsesInputMessageContentBlockFile.FileType = &fileType + } + + // Convert document source data + if block.Document.Source != nil { + if block.Document.Source.Text != nil { + // Plain text content + fileBlock.ResponsesInputMessageContentBlockFile.FileData = block.Document.Source.Text + } else if block.Document.Source.Bytes != nil { + // Base64 encoded bytes (PDF) + fileBlock.ResponsesInputMessageContentBlockFile.FileData = block.Document.Source.Bytes + } + } + + bifrostMsg := schemas.ResponsesMessage{ + Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), + Role: &role, + Content: &schemas.ResponsesMessageContent{ + ContentBlocks: []schemas.ResponsesMessageContentBlock{fileBlock}, + }, + } + if isOutputMessage { + bifrostMsg.ID = schemas.Ptr("msg_" + fmt.Sprintf("%d", time.Now().UnixNano())) + } + outputMessages = append(outputMessages, bifrostMsg) + } else if block.ToolResult != nil { // Tool result content - typically not in assistant output but handled for completeness // Prefer JSON payloads without unmarshalling; fallback to text @@ -2856,6 +2926,63 @@ func convertBifrostResponsesMessageContentBlocksToBedrockContentBlocks(content s }, } } + case schemas.ResponsesInputMessageContentBlockTypeFile: + if block.ResponsesInputMessageContentBlockFile != nil { + doc := &BedrockDocumentSource{ + Name: "document", // Default + Format: "pdf", // Default + Source: &BedrockDocumentSourceData{}, + } + + // Set filename + if block.ResponsesInputMessageContentBlockFile.Filename != nil { + doc.Name = *block.ResponsesInputMessageContentBlockFile.Filename + } + + // Determine format: text or PDF based on FileType + isTextFile := false + if block.ResponsesInputMessageContentBlockFile.FileType != nil { + fileType := *block.ResponsesInputMessageContentBlockFile.FileType + // Check if it's a text type + if strings.HasPrefix(fileType, "text/") || + fileType == "txt" || fileType == "md" || + fileType == "html" { + doc.Format = "txt" + isTextFile = true + } else if strings.Contains(fileType, "pdf") || fileType == "pdf" { + doc.Format = "pdf" + } + } + + // Handle file data + if block.ResponsesInputMessageContentBlockFile.FileData != nil { + fileData := *block.ResponsesInputMessageContentBlockFile.FileData + + // Check if it's a data URL (e.g., "data:application/pdf;base64,...") + if strings.HasPrefix(fileData, "data:") { + urlInfo := schemas.ExtractURLTypeInfo(fileData) + if urlInfo.DataURLWithoutPrefix != nil { + // PDF or other binary - keep as base64 + doc.Source.Bytes = urlInfo.DataURLWithoutPrefix + bedrockBlock.Document = doc + break + } + } + + // Not a data URL - use as-is + if isTextFile { + // bytes is necessary for bedrock + // base64 string of the text + doc.Source.Text = &fileData + encoded := base64.StdEncoding.EncodeToString([]byte(fileData)) + doc.Source.Bytes = &encoded + } else { + doc.Source.Bytes = &fileData + } + } + + bedrockBlock.Document = doc + } default: // Don't add anything for unknown types continue diff --git a/core/providers/bedrock/types.go b/core/providers/bedrock/types.go index d24b505c8..bb1677212 100644 --- a/core/providers/bedrock/types.go +++ b/core/providers/bedrock/types.go @@ -213,14 +213,15 @@ type BedrockImageSourceData struct { // BedrockDocumentSource represents document content type BedrockDocumentSource struct { - Format string `json:"format"` // Required: Document format (pdf, csv, doc, docx, xls, xlsx, html, txt, md) - Name string `json:"name"` // Required: Document name - Source BedrockDocumentSourceData `json:"source"` // Required: Document source data + Format string `json:"format"` // Required: Document format (pdf, csv, doc, docx, xls, xlsx, html, txt, md) + Name string `json:"name"` // Required: Document name + Source *BedrockDocumentSourceData `json:"source"` // Required: Document source data } // BedrockDocumentSourceData represents the source of document data type BedrockDocumentSourceData struct { Bytes *string `json:"bytes,omitempty"` // Base64-encoded document bytes + Text *string `json:"text,omitempty"` // Plain text content } // BedrockToolUse represents a tool use request diff --git a/core/providers/bedrock/utils.go b/core/providers/bedrock/utils.go index 5b4e00e54..bab0422c9 100644 --- a/core/providers/bedrock/utils.go +++ b/core/providers/bedrock/utils.go @@ -2,6 +2,7 @@ package bedrock import ( "context" + "encoding/base64" "encoding/json" "fmt" "strings" @@ -508,6 +509,66 @@ func convertContentBlock(block schemas.ChatContentBlock) ([]BedrockContentBlock, } return blocks, nil + case schemas.ChatContentBlockTypeFile: + if block.File == nil { + return nil, fmt.Errorf("file block missing file field") + } + + documentSource := &BedrockDocumentSource{ + Name: "document", + Format: "pdf", + Source: &BedrockDocumentSourceData{}, + } + + // Set filename + if block.File.Filename != nil { + documentSource.Name = *block.File.Filename + } + + // Convert MIME type to Bedrock format (pdf or txt) + isText := false + if block.File.FileType != nil { + fileType := *block.File.FileType + if fileType == "text/plain" || fileType == "txt" { + documentSource.Format = "txt" + isText = true + } else if strings.Contains(fileType, "pdf") || fileType == "pdf" { + documentSource.Format = "pdf" + } + } + + // Handle file data - strip data URL prefix if present + if block.File.FileData != nil { + fileData := *block.File.FileData + + // Check if it's a data URL and extract raw base64 + if strings.HasPrefix(fileData, "data:") { + urlInfo := schemas.ExtractURLTypeInfo(fileData) + if urlInfo.DataURLWithoutPrefix != nil { + documentSource.Source.Bytes = urlInfo.DataURLWithoutPrefix + return []BedrockContentBlock{ + { + Document: documentSource, + }, + }, nil + } + } + + // Set text or bytes based on file type + if isText { + documentSource.Source.Text = &fileData // Plain text + encoded := base64.StdEncoding.EncodeToString([]byte(fileData)) + documentSource.Source.Bytes = &encoded // Also sets Bytes + } else { + documentSource.Source.Bytes = &fileData + } + } + + return []BedrockContentBlock{ + { + Document: documentSource, + }, + }, nil case schemas.ChatContentBlockTypeInputAudio: // Bedrock doesn't support audio input in Converse API return nil, fmt.Errorf("audio input not supported in Bedrock Converse API") diff --git a/core/providers/gemini/responses.go b/core/providers/gemini/responses.go index f7862aeeb..e87c8e701 100644 --- a/core/providers/gemini/responses.go +++ b/core/providers/gemini/responses.go @@ -1588,7 +1588,12 @@ func convertGeminiInlineDataToContentBlock(blob *Blob) *schemas.ResponsesMessage Type: schemas.ResponsesInputMessageContentBlockTypeFile, ResponsesInputMessageContentBlockFile: &schemas.ResponsesInputMessageContentBlockFile{ FileData: &encodedData, - Filename: &blob.DisplayName, + FileType: func() *string { + if blob.MIMEType != "" { + return &blob.MIMEType + } + return nil + }(), }, } } @@ -1601,7 +1606,7 @@ func convertGeminiFileDataToContentBlock(fileData *FileData) *schemas.ResponsesM mimeType := fileData.MIMEType if mimeType == "" { - mimeType = "application/octet-stream" + mimeType = "application/pdf" } // Handle images @@ -1615,12 +1620,17 @@ func convertGeminiFileDataToContentBlock(fileData *FileData) *schemas.ResponsesM } // Handle other files - return &schemas.ResponsesMessageContentBlock{ + block := &schemas.ResponsesMessageContentBlock{ Type: schemas.ResponsesInputMessageContentBlockTypeFile, ResponsesInputMessageContentBlockFile: &schemas.ResponsesInputMessageContentBlockFile{ FileURL: &fileData.FileURI, }, } + + // Set FileType if available + block.ResponsesInputMessageContentBlockFile.FileType = &mimeType + + return block } func convertGeminiToolsToResponsesTools(tools []Tool) []schemas.ResponsesTool { @@ -2483,26 +2493,52 @@ func convertContentBlockToGeminiPart(block schemas.ResponsesMessageContentBlock) case schemas.ResponsesInputMessageContentBlockTypeFile: if block.ResponsesInputMessageContentBlockFile != nil { - if block.ResponsesInputMessageContentBlockFile.FileURL != nil { - return &Part{ + fileBlock := block.ResponsesInputMessageContentBlockFile + + // Handle FileURL (URI-based file) + if fileBlock.FileURL != nil { + mimeType := "application/pdf" + if fileBlock.FileType != nil { + mimeType = *fileBlock.FileType + } + + part := &Part{ FileData: &FileData{ - MIMEType: "application/octet-stream", // default - FileURI: *block.ResponsesInputMessageContentBlockFile.FileURL, + MIMEType: mimeType, + FileURI: *fileBlock.FileURL, }, - }, nil - } else if block.ResponsesInputMessageContentBlockFile.FileData != nil { - raw := *block.ResponsesInputMessageContentBlockFile.FileData - data := []byte(raw) - // FileData is base64-encoded - if decoded, err := base64.StdEncoding.DecodeString(raw); err == nil { - data = decoded } - return &Part{ - InlineData: &Blob{ - MIMEType: "application/octet-stream", // default - Data: data, - }, - }, nil + + if fileBlock.Filename != nil { + part.FileData.DisplayName = *fileBlock.Filename + } + + return part, nil + } + + // Handle FileData (inline file data) + if fileBlock.FileData != nil { + mimeType := "application/pdf" + if fileBlock.FileType != nil { + mimeType = *fileBlock.FileType + } + + // Convert file data to bytes using the helper function + dataBytes, extractedMimeType := convertFileDataToBytes(*fileBlock.FileData) + if extractedMimeType != "" { + mimeType = extractedMimeType + } + + if len(dataBytes) > 0 { + part := &Part{ + InlineData: &Blob{ + MIMEType: mimeType, + Data: dataBytes, + }, + } + + return part, nil + } } } } diff --git a/core/providers/gemini/utils.go b/core/providers/gemini/utils.go index 166fea03a..784b3fd2f 100644 --- a/core/providers/gemini/utils.go +++ b/core/providers/gemini/utils.go @@ -216,6 +216,40 @@ func isImageMimeType(mimeType string) bool { return false } +// convertFileDataToBytes converts file data (data URL or base64) to raw bytes for Gemini API. +// Returns the bytes and an extracted mime type (if found in data URL). +func convertFileDataToBytes(fileData string) ([]byte, string) { + var dataBytes []byte + var mimeType string + + // Check if it's a data URL (e.g., "data:application/pdf;base64,...") + if strings.HasPrefix(fileData, "data:") { + urlInfo := schemas.ExtractURLTypeInfo(fileData) + + if urlInfo.DataURLWithoutPrefix != nil { + // Decode the base64 content + decoded, err := base64.StdEncoding.DecodeString(*urlInfo.DataURLWithoutPrefix) + if err == nil { + dataBytes = decoded + if urlInfo.MediaType != nil { + mimeType = *urlInfo.MediaType + } + } + } + } else { + // Try to decode as plain base64 + decoded, err := base64.StdEncoding.DecodeString(fileData) + if err == nil { + dataBytes = decoded + } else { + // Not base64 - treat as plain text + dataBytes = []byte(fileData) + } + } + + return dataBytes, mimeType +} + var ( // Maps Gemini finish reasons to Bifrost format geminiFinishReasonToBifrost = map[FinishReason]string{ @@ -577,6 +611,42 @@ func convertBifrostMessagesToGemini(messages []schemas.ChatMessage) []Content { parts = append(parts, &Part{ Text: *block.Text, }) + } else if block.File != nil { + // Handle file blocks - use FileID if available (uploaded file) + if block.File.FileID != nil && *block.File.FileID != "" { + mimeType := "application/pdf" + if block.File.FileType != nil { + mimeType = *block.File.FileType + } + parts = append(parts, &Part{ + FileData: &FileData{ + FileURI: *block.File.FileID, + MIMEType: mimeType, + }, + }) + } else if block.File.FileData != nil { + // Inline file data - convert to InlineData (Blob) + fileData := *block.File.FileData + mimeType := "application/pdf" + if block.File.FileType != nil { + mimeType = *block.File.FileType + } + + // Convert file data to bytes for Gemini Blob + dataBytes, extractedMimeType := convertFileDataToBytes(fileData) + if extractedMimeType != "" { + mimeType = extractedMimeType + } + + if len(dataBytes) > 0 { + parts = append(parts, &Part{ + InlineData: &Blob{ + MIMEType: mimeType, + Data: dataBytes, + }, + }) + } + } } // Handle other content block types as needed } diff --git a/core/providers/openai/types.go b/core/providers/openai/types.go index 3915eafba..d00051fef 100644 --- a/core/providers/openai/types.go +++ b/core/providers/openai/types.go @@ -89,7 +89,7 @@ func (r *OpenAIChatRequest) MarshalJSON() ([]byte, error) { // First pass: check if we need to modify any messages needsCopy := false for _, msg := range r.Messages { - if hasCacheControlInChatMessage(msg) { + if hasFieldsToStripInChatMessage(msg) { needsCopy = true break } @@ -100,7 +100,7 @@ func (r *OpenAIChatRequest) MarshalJSON() ([]byte, error) { if needsCopy { processedMessages = make([]OpenAIMessage, len(r.Messages)) for i, msg := range r.Messages { - if !hasCacheControlInChatMessage(msg) { + if !hasFieldsToStripInChatMessage(msg) { // No modification needed, use original processedMessages[i] = msg continue @@ -109,14 +109,21 @@ func (r *OpenAIChatRequest) MarshalJSON() ([]byte, error) { // Copy message processedMessages[i] = msg - // Strip CacheControl from content blocks if needed + // Strip CacheControl and FileType from content blocks if needed if msg.Content != nil && msg.Content.ContentBlocks != nil { contentCopy := *msg.Content contentCopy.ContentBlocks = make([]schemas.ChatContentBlock, len(msg.Content.ContentBlocks)) for j, block := range msg.Content.ContentBlocks { - if block.CacheControl != nil { + needsBlockCopy := block.CacheControl != nil || (block.File != nil && block.File.FileType != nil) + if needsBlockCopy { blockCopy := block blockCopy.CacheControl = nil + // Strip FileType from file block + if blockCopy.File != nil && blockCopy.File.FileType != nil { + fileCopy := *blockCopy.File + fileCopy.FileType = nil + blockCopy.File = &fileCopy + } contentCopy.ContentBlocks[j] = blockCopy } else { contentCopy.ContentBlocks[j] = block @@ -258,7 +265,7 @@ func (r *OpenAIResponsesRequestInput) MarshalJSON() ([]byte, error) { // First pass: check if we need to modify anything needsCopy := false for _, msg := range r.OpenAIResponsesRequestInputArray { - if hasCacheControl(msg) { + if hasFieldsToStripInResponsesMessage(msg) { needsCopy = true break } @@ -272,7 +279,7 @@ func (r *OpenAIResponsesRequestInput) MarshalJSON() ([]byte, error) { // Only copy messages that have CacheControl messagesCopy := make([]schemas.ResponsesMessage, len(r.OpenAIResponsesRequestInputArray)) for i, msg := range r.OpenAIResponsesRequestInputArray { - if !hasCacheControl(msg) { + if !hasFieldsToStripInResponsesMessage(msg) { // No modification needed, use original messagesCopy[i] = msg continue @@ -281,45 +288,59 @@ func (r *OpenAIResponsesRequestInput) MarshalJSON() ([]byte, error) { // Copy only this message messagesCopy[i] = msg - // Strip CacheControl from content blocks if needed + // Strip CacheControl and FileType from content blocks if needed if msg.Content != nil && msg.Content.ContentBlocks != nil { contentCopy := *msg.Content contentCopy.ContentBlocks = make([]schemas.ResponsesMessageContentBlock, len(msg.Content.ContentBlocks)) - hasContentCache := false + hasContentModification := false for j, block := range msg.Content.ContentBlocks { - if block.CacheControl != nil { - hasContentCache = true + needsBlockCopy := block.CacheControl != nil || (block.ResponsesInputMessageContentBlockFile != nil && block.ResponsesInputMessageContentBlockFile.FileType != nil) + if needsBlockCopy { + hasContentModification = true blockCopy := block blockCopy.CacheControl = nil + // Strip FileType from file block + if blockCopy.ResponsesInputMessageContentBlockFile != nil && blockCopy.ResponsesInputMessageContentBlockFile.FileType != nil { + fileCopy := *blockCopy.ResponsesInputMessageContentBlockFile + fileCopy.FileType = nil + blockCopy.ResponsesInputMessageContentBlockFile = &fileCopy + } contentCopy.ContentBlocks[j] = blockCopy } else { contentCopy.ContentBlocks[j] = block } } - if hasContentCache { + if hasContentModification { messagesCopy[i].Content = &contentCopy } } - // Strip CacheControl from tool message output blocks if needed + // Strip CacheControl and FileType from tool message output blocks if needed if msg.ResponsesToolMessage != nil && msg.ResponsesToolMessage.Output != nil { if msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks != nil { - hasToolCache := false + hasToolModification := false for _, block := range msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks { - if block.CacheControl != nil { - hasToolCache = true + if block.CacheControl != nil || (block.ResponsesInputMessageContentBlockFile != nil && block.ResponsesInputMessageContentBlockFile.FileType != nil) { + hasToolModification = true break } } - if hasToolCache { + if hasToolModification { toolMsgCopy := *msg.ResponsesToolMessage outputCopy := *msg.ResponsesToolMessage.Output outputCopy.ResponsesFunctionToolCallOutputBlocks = make([]schemas.ResponsesMessageContentBlock, len(msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks)) for j, block := range msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks { - if block.CacheControl != nil { + needsBlockCopy := block.CacheControl != nil || (block.ResponsesInputMessageContentBlockFile != nil && block.ResponsesInputMessageContentBlockFile.FileType != nil) + if needsBlockCopy { blockCopy := block blockCopy.CacheControl = nil + // Strip FileType from file block + if blockCopy.ResponsesInputMessageContentBlockFile != nil && blockCopy.ResponsesInputMessageContentBlockFile.FileType != nil { + fileCopy := *blockCopy.ResponsesInputMessageContentBlockFile + fileCopy.FileType = nil + blockCopy.ResponsesInputMessageContentBlockFile = &fileCopy + } outputCopy.ResponsesFunctionToolCallOutputBlocks[j] = blockCopy } else { outputCopy.ResponsesFunctionToolCallOutputBlocks[j] = block @@ -336,25 +357,31 @@ func (r *OpenAIResponsesRequestInput) MarshalJSON() ([]byte, error) { return sonic.Marshal(nil) } -// Helper function to check if a chat message has any CacheControl fields -func hasCacheControlInChatMessage(msg OpenAIMessage) bool { +// Helper function to check if a chat message has any CacheControl fields or FileType in file blocks +func hasFieldsToStripInChatMessage(msg OpenAIMessage) bool { if msg.Content != nil && msg.Content.ContentBlocks != nil { for _, block := range msg.Content.ContentBlocks { if block.CacheControl != nil { return true } + if block.File != nil && block.File.FileType != nil { + return true + } } } return false } -// Helper function to check if a responses message has any CacheControl fields -func hasCacheControl(msg schemas.ResponsesMessage) bool { +// Helper function to check if a responses message has any CacheControl fields or FileType in file blocks +func hasFieldsToStripInResponsesMessage(msg schemas.ResponsesMessage) bool { if msg.Content != nil && msg.Content.ContentBlocks != nil { for _, block := range msg.Content.ContentBlocks { if block.CacheControl != nil { return true } + if block.ResponsesInputMessageContentBlockFile != nil && block.ResponsesInputMessageContentBlockFile.FileType != nil { + return true + } } } if msg.ResponsesToolMessage != nil && msg.ResponsesToolMessage.Output != nil { @@ -363,6 +390,9 @@ func hasCacheControl(msg schemas.ResponsesMessage) bool { if block.CacheControl != nil { return true } + if block.ResponsesInputMessageContentBlockFile != nil && block.ResponsesInputMessageContentBlockFile.FileType != nil { + return true + } } } } diff --git a/core/schemas/chatcompletions.go b/core/schemas/chatcompletions.go index d795e2d4b..bd101bd26 100644 --- a/core/schemas/chatcompletions.go +++ b/core/schemas/chatcompletions.go @@ -600,7 +600,7 @@ const ( ChatContentBlockTypeText ChatContentBlockType = "text" ChatContentBlockTypeImage ChatContentBlockType = "image_url" ChatContentBlockTypeInputAudio ChatContentBlockType = "input_audio" - ChatContentBlockTypeFile ChatContentBlockType = "input_file" + ChatContentBlockTypeFile ChatContentBlockType = "file" ChatContentBlockTypeRefusal ChatContentBlockType = "refusal" ) @@ -647,6 +647,7 @@ type ChatInputFile struct { FileData *string `json:"file_data,omitempty"` // Base64 encoded file data FileID *string `json:"file_id,omitempty"` // Reference to uploaded file Filename *string `json:"filename,omitempty"` // Name of the file + FileType *string `json:"file_type,omitempty"` // Type of the file } // ChatToolMessage represents a tool message in a chat conversation. diff --git a/core/schemas/mux.go b/core/schemas/mux.go index ccd058b17..de8392900 100644 --- a/core/schemas/mux.go +++ b/core/schemas/mux.go @@ -575,7 +575,7 @@ func ToChatMessages(rms []ResponsesMessage) []ChatMessage { case ResponsesInputMessageContentBlockTypeImage: chatBlockType = ChatContentBlockTypeImage // "input_image" -> "image_url" case ResponsesInputMessageContentBlockTypeFile: - chatBlockType = ChatContentBlockTypeFile // "input_file" -> "input_file" (same) + chatBlockType = ChatContentBlockTypeFile // "input_file" -> "file" case ResponsesInputMessageContentBlockTypeAudio: chatBlockType = ChatContentBlockTypeInputAudio // "input_audio" -> "input_audio" (same) default: diff --git a/core/schemas/responses.go b/core/schemas/responses.go index f50570b5f..0811c42f6 100644 --- a/core/schemas/responses.go +++ b/core/schemas/responses.go @@ -426,9 +426,10 @@ type ResponsesInputMessageContentBlockImage struct { } type ResponsesInputMessageContentBlockFile struct { - FileData *string `json:"file_data,omitempty"` // Base64 encoded file data + FileData *string `json:"file_data,omitempty"` // Base64 encoded file data or plain text FileURL *string `json:"file_url,omitempty"` // Direct URL to file Filename *string `json:"filename,omitempty"` // Name of the file + FileType *string `json:"file_type,omitempty"` // MIME type (e.g., "application/pdf", "text/plain") } type ResponsesInputMessageContentBlockAudio struct { diff --git a/core/version b/core/version index f3014908e..bb3653fe5 100644 --- a/core/version +++ b/core/version @@ -1 +1 @@ -1.2.42 \ No newline at end of file +1.2.43 \ No newline at end of file diff --git a/tests/integrations/config.yml b/tests/integrations/config.yml index c58c1b69b..cb8e858ae 100644 --- a/tests/integrations/config.yml +++ b/tests/integrations/config.yml @@ -34,8 +34,9 @@ api: # Integrations (openai, anthropic, google, litellm, langchain) map to these providers providers: openai: - chat: "gpt-4o-mini" + chat: "gpt-4o" vision: "gpt-4o" + file: "gpt-5" tools: "gpt-4o-mini" speech: "tts-1" transcription: "whisper-1" @@ -62,6 +63,7 @@ providers: anthropic: chat: "claude-sonnet-4-5-20250929" vision: "claude-3-7-sonnet-20250219" + file: "claude-sonnet-4-5-20250929" tools: "claude-sonnet-4-5-20250929" streaming: "claude-sonnet-4-5-20250929" thinking: "claude-opus-4-5" @@ -85,6 +87,7 @@ providers: chat: "gemini-2.5-flash" vision: "gemini-2.5-flash" tools: "gemini-2.5-flash" + file: "gemini-2.5-flash" thinking: "gemini-3-pro-preview" speech: "gemini-2.5-flash-preview-tts" transcription: "gemini-2.5-flash" @@ -112,6 +115,7 @@ providers: bedrock: chat: "global.anthropic.claude-sonnet-4-20250514-v1:0" vision: "global.anthropic.claude-sonnet-4-20250514-v1:0" + file: "global.anthropic.claude-sonnet-4-20250514-v1:0" tools: "global.anthropic.claude-sonnet-4-20250514-v1:0" streaming: "global.anthropic.claude-sonnet-4-20250514-v1:0" thinking: "us.anthropic.claude-opus-4-5-20251101-v1:0" @@ -161,6 +165,7 @@ provider_scenarios: automatic_function_calling: true image_url: true image_base64: true + file_input: true multiple_images: true speech_synthesis: true speech_synthesis_streaming: true @@ -200,6 +205,8 @@ provider_scenarios: automatic_function_calling: true image_url: true image_base64: true + file_input: true + file_input_text: true multiple_images: true speech_synthesis: false speech_synthesis_streaming: false @@ -239,6 +246,7 @@ provider_scenarios: automatic_function_calling: true image_url: false # Gemini requires base64 or file upload image_base64: true + file_input: true multiple_images: false speech_synthesis: true speech_synthesis_streaming: true @@ -278,6 +286,8 @@ provider_scenarios: automatic_function_calling: true image_url: false image_base64: true + file_input: true + file_input_text: true multiple_images: false speech_synthesis: false speech_synthesis_streaming: false @@ -360,6 +370,8 @@ scenario_capabilities: automatic_function_calling: "tools" image_url: "vision" image_base64: "vision" + file_input: "file" + file_input_text: "file" multiple_images: "vision" speech_synthesis: "speech" speech_synthesis_streaming: "speech" diff --git a/tests/integrations/tests/test_anthropic.py b/tests/integrations/tests/test_anthropic.py index ad0ade037..ba0151efa 100644 --- a/tests/integrations/tests/test_anthropic.py +++ b/tests/integrations/tests/test_anthropic.py @@ -56,6 +56,7 @@ CALCULATOR_TOOL, COMPARISON_KEYWORDS, IMAGE_URL, + FILE_DATA_BASE64, INPUT_TOKENS_LONG_TEXT, INPUT_TOKENS_SIMPLE_TEXT, INPUT_TOKENS_WITH_SYSTEM, @@ -1671,6 +1672,103 @@ def test_31c_input_tokens_long_text(self, anthropic_client, test_config, provide f"Long text should have >100 tokens, got {response.input_tokens}" ) + @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("file_input")) + def test_31_document_pdf_input(self, anthropic_client, test_config, provider, model): + """Test Case 31: PDF document input""" + if provider == "_no_providers_" or model == "_no_model_": + pytest.skip("No providers configured for document_input scenario") + + messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What is the main content of this PDF document? Summarize it." + }, + { + "type": "document", + "title": "testing", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": FILE_DATA_BASE64 + } + } + ] + } + ] + + response = anthropic_client.messages.create( + model=format_provider_model(provider, model), + messages=messages, + max_tokens=500 + ) + + assert_valid_chat_response(response) + assert len(response.content) > 0 + assert response.content[0].type == "text" + content = response.content[0].text.lower() + + # Should mention "hello world" from the PDF + assert any(word in content for word in ["hello", "world"]), \ + f"Response should reference document content. Got: {content}" + + @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("file_input_text")) + def test_32_document_text_input(self, anthropic_client, test_config, provider, model): + """Test Case 32: Text document input""" + if provider == "_no_providers_" or model == "_no_model_": + pytest.skip("No providers configured for document_input scenario") + + # Plain text document content + text_content = """This is a test text document for document input testing. + +It contains multiple paragraphs to ensure the model can properly process text documents. + +Key features of this document: +1. Multiple lines and structure +2. Clear formatting +3. Numbered list + +This document is used to verify that the AI can read and understand text document inputs.""" + + messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key features mentioned in this document?" + }, + { + "type": "document", + "title": "testing", + "source": { + "type": "text", + "media_type": "text/plain", + "data": text_content + } + } + ] + } + ] + + response = anthropic_client.messages.create( + model=format_provider_model(provider, model), + messages=messages, + max_tokens=500 + ) + + assert_valid_chat_response(response) + assert len(response.content) > 0 + assert response.content[0].type == "text" + content = response.content[0].text.lower() + + # Should reference the document features + document_keywords = ["feature", "line", "format", "list", "document"] + assert any(word in content for word in document_keywords), \ + f"Response should reference document features. Got: {content}" + # Additional helper functions specific to Anthropic def serialize_anthropic_content(content_blocks: List[Any]) -> List[Dict[str, Any]]: diff --git a/tests/integrations/tests/test_openai.py b/tests/integrations/tests/test_openai.py index a5d62fb7b..f264ce002 100644 --- a/tests/integrations/tests/test_openai.py +++ b/tests/integrations/tests/test_openai.py @@ -73,6 +73,7 @@ from .utils.common import ( CALCULATOR_TOOL, + FILE_DATA_BASE64, COMPARISON_KEYWORDS, COMPLEX_E2E_MESSAGES, EMBEDDINGS_DIFFERENT_TEXTS, @@ -1133,6 +1134,46 @@ def test_31_list_models(self, openai_client, test_config): assert response.data is not None assert len(response.data) > 0 + @pytest.mark.parametrize( + "provider,model", get_cross_provider_params_for_scenario("file_input") + ) + def test_chat_completion_with_file(self, openai_client, test_config, provider, model): + """Test chat completion with PDF file input""" + if provider == "_no_providers_" or model == "_no_model_": + pytest.skip("No providers configured for this scenario") + + response = openai_client.chat.completions.create( + model=format_provider_model(provider, model), + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What is the main topic of this document? Summarize the key concepts.", + }, + { + "type": "file", + "file": { + "file_data": f"data:application/pdf;base64,{FILE_DATA_BASE64}", + "filename": "testingpdf", + }, + }, + ], + } + ], + max_tokens=400, + ) + + assert_valid_chat_response(response) + content = get_content_string(response.choices[0].message.content) + content_lower = content.lower() + + # Should mention quantum computing concepts + keywords = ["hello", "world", "testing", "pdf", "file"] + assert any( + keyword in content_lower for keyword in keywords + ), f"Response should describe the document content. Got: {content}" # ========================================================================= # RESPONSES API TEST CASES @@ -1152,7 +1193,7 @@ def test_32_responses_simple_text(self, openai_client, test_config, provider, mo # Validate response structure assert_valid_responses_response(response, min_content_length=20) - # Check that we have meaningful content about space + # Check that we have meaningful content content = "" for message in response.output: if hasattr(message, "content") and message.content: @@ -1164,10 +1205,10 @@ def test_32_responses_simple_text(self, openai_client, test_config, provider, mo content += block.text content_lower = content.lower() - space_keywords = ["space", "rocket", "astronaut", "moon", "mars", "nasa", "satellite"] + keywords = ["hello", "world", "testing", "pdf", "file"] assert any( - keyword in content_lower for keyword in space_keywords - ), f"Response should contain space-related content. Got: {content}" + keyword in content_lower for keyword in keywords + ), f"Response should describe the document content. Got: {content}" # Verify usage information if hasattr(response, "usage"): @@ -1251,6 +1292,56 @@ def test_34_responses_with_image(self, openai_client, test_config, provider, mod keyword in content_lower for keyword in image_keywords ), f"Response should describe the image. Got: {content}" + @pytest.mark.parametrize( + "provider,model", get_cross_provider_params_for_scenario("file_input") + ) + def test_responses_with_file(self, openai_client, test_config, provider, model): + """Test Responses API with base64-encoded PDF file""" + if provider == "_no_providers_" or model == "_no_model_": + pytest.skip("No providers configured for this scenario") + + response = openai_client.responses.create( + model=format_provider_model(provider, model), + input=[ + { + "role": "user", + "content": [ + { + "type": "input_text", + "text": "What is the main topic of this document? Summarize the key concepts.", + }, + { + "type": "input_file", + "filename": "testingpdf", + "file_data": f"data:application/pdf;base64,{FILE_DATA_BASE64}", + }, + ], + } + ], + max_output_tokens=400, + ) + + # Validate response structure + assert_valid_responses_response(response, min_content_length=30) + + # Extract content + content = "" + for message in response.output: + if hasattr(message, "content") and message.content: + if isinstance(message.content, str): + content += message.content + elif isinstance(message.content, list): + for block in message.content: + if hasattr(block, "text") and block.text: + content += block.text + + # Check for recipe-related keywords + content_lower = content.lower() + keywords = ["hello", "world", "testing", "pdf", "file"] + assert any( + keyword in content_lower for keyword in keywords + ), f"Response should describe the recipe document. Got: {content}" + @pytest.mark.parametrize("provider,model", get_cross_provider_params_for_scenario("responses")) def test_35_responses_with_tools(self, openai_client, test_config, provider, model): if provider == "_no_providers_" or model == "_no_model_": diff --git a/tests/integrations/tests/utils/common.py b/tests/integrations/tests/utils/common.py index 09b22b1ad..c50e0bf72 100644 --- a/tests/integrations/tests/utils/common.py +++ b/tests/integrations/tests/utils/common.py @@ -26,6 +26,21 @@ class Config: IMAGE_URL = "https://pub-cdead89c2f004d8f963fd34010c479d0.r2.dev/Gfp-wisconsin-madison-the-nature-boardwalk.jpg" IMAGE_URL_SECONDARY = "https://goo.gle/instrument-img" +FILE_DATA_BASE64 = ( + "JVBERi0xLjcKCjEgMCBvYmogICUgZW50cnkgcG9pbnQKPDwKICAvVHlwZSAvQ2F0YWxvZwogIC" + "9QYWdlcyAyIDAgUgo+PgplbmRvYmoKCjIgMCBvYmoKPDwKICAvVHlwZSAvUGFnZXwKICAvTWV" + "kaWFCb3ggWyAwIDAgMjAwIDIwMCBdCiAgL0NvdW50IDEKICAvS2lkcyBbIDMgMCBSIF0KPj4K" + "ZW5kb2JqCgozIDAgb2JqCjw8CiAgL1R5cGUgL1BhZ2UKICAvUGFyZW50IDIgMCBSCiAgL1Jlc" + "291cmNlcyA8PAogICAgL0ZvbnQgPDwKICAgICAgL0YxIDQgMCBSCj4+CiAgPj4KICAvQ29udG" + "VudHMgNSAwIFIKPj4KZW5kb2JqCgo0IDAgb2JqCjw8CiAgL1R5cGUgL0ZvbnQKICAvU3VidHl" + "wZSAvVHlwZTEKICAvQmFzZUZvbnQgL1RpbWVzLVJvbWFuCj4+CmVuZG9iagoKNSAwIG9iago8" + "PAogIC9MZW5ndGggNDQKPj4Kc3RyZWFtCkJUCjcwIDUwIFRECi9GMSAxMiBUZgooSGVsbG8gV" + "29ybGQhKSBUagpFVAplbmRzdHJlYW0KZW5kb2JqCgp4cmVmCjAgNgowMDAwMDAwMDAwIDY1NT" + "M1IGYgCjAwMDAwMDAwMTAgMDAwMDAgbiAKMDAwMDAwMDA2MCAwMDAwMCBuIAowMDAwMDAwMTU" + "3IDAwMDAwIG4gCjAwMDAwMDAyNTUgMDAwMDAgbiAKMDAwMDAwMDM1MyAwMDAwMCBuIAp0cmFp" + "bGVyCjw8CiAgL1NpemUgNgogIC9Sb290IDEgMCBSCj4+CnN0YXJ0eHJlZgo0NDkKJSVFT0YK" + ) + # Small test image as base64 (1x1 pixel red PNG) BASE64_IMAGE = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==" diff --git a/transports/changelog.md b/transports/changelog.md index e69de29bb..49c69d7c9 100644 --- a/transports/changelog.md +++ b/transports/changelog.md @@ -0,0 +1 @@ +- feat: add document/file support for Anthropic, Bedrock, and Gemini \ No newline at end of file