Skip to content

Commit cccf60c

Browse files
feat: responses reasoning fixes
1 parent b5df7bd commit cccf60c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+9179
-4849
lines changed

core/internal/testutil/account.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ func (account *ComprehensiveTestAccount) GetKeysForProvider(ctx *context.Context
139139
"claude-3.7-sonnet": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
140140
"claude-4-sonnet": "global.anthropic.claude-sonnet-4-20250514-v1:0",
141141
"claude-4.5-sonnet": "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
142+
"claude-4.5-haiku": "global.anthropic.claude-haiku-4-5-20251001-v1:0",
142143
},
143144
},
144145
},

core/internal/testutil/chat_completion_stream.go

Lines changed: 351 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,4 +355,355 @@ func RunChatCompletionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx cont
355355
t.Logf("✅ Streaming with tools test completed successfully")
356356
})
357357
}
358+
359+
// Test chat completion streaming with reasoning if supported
360+
if testConfig.Scenarios.Reasoning && testConfig.ReasoningModel != "" {
361+
t.Run("ChatCompletionStreamWithReasoning", func(t *testing.T) {
362+
if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
363+
t.Parallel()
364+
}
365+
366+
problemPrompt := "Solve this step by step: If a train leaves station A at 2 PM traveling at 60 mph, and another train leaves station B at 3 PM traveling at 80 mph toward station A, and the stations are 420 miles apart, when will they meet?"
367+
368+
messages := []schemas.ChatMessage{
369+
CreateBasicChatMessage(problemPrompt),
370+
}
371+
372+
request := &schemas.BifrostChatRequest{
373+
Provider: testConfig.Provider,
374+
Model: testConfig.ReasoningModel,
375+
Input: messages,
376+
Params: &schemas.ChatParameters{
377+
MaxCompletionTokens: bifrost.Ptr(1800),
378+
Reasoning: &schemas.ChatReasoning{
379+
Effort: bifrost.Ptr("high"),
380+
MaxTokens: bifrost.Ptr(1500),
381+
},
382+
},
383+
Fallbacks: testConfig.Fallbacks,
384+
}
385+
386+
// Use retry framework for stream requests with reasoning
387+
retryConfig := StreamingRetryConfig()
388+
retryContext := TestRetryContext{
389+
ScenarioName: "ChatCompletionStreamWithReasoning",
390+
ExpectedBehavior: map[string]interface{}{
391+
"should_stream_reasoning": true,
392+
"should_have_reasoning_events": true,
393+
"problem_type": "mathematical",
394+
},
395+
TestMetadata: map[string]interface{}{
396+
"provider": testConfig.Provider,
397+
"model": testConfig.ReasoningModel,
398+
"reasoning": true,
399+
},
400+
}
401+
402+
// Use proper streaming retry wrapper for the stream request
403+
responseChannel, err := WithStreamRetry(t, retryConfig, retryContext, func() (chan *schemas.BifrostStream, *schemas.BifrostError) {
404+
return client.ChatCompletionStreamRequest(ctx, request)
405+
})
406+
407+
RequireNoError(t, err, "Chat completion stream with reasoning failed")
408+
if responseChannel == nil {
409+
t.Fatal("Response channel should not be nil")
410+
}
411+
412+
var reasoningDetected bool
413+
var reasoningDetailsDetected bool
414+
var reasoningTokensDetected bool
415+
var responseCount int
416+
417+
streamCtx, cancel := context.WithTimeout(ctx, 200*time.Second)
418+
defer cancel()
419+
420+
t.Logf("🧠 Testing chat completion streaming with reasoning...")
421+
422+
for {
423+
select {
424+
case response, ok := <-responseChannel:
425+
if !ok {
426+
goto reasoningStreamComplete
427+
}
428+
429+
if response == nil {
430+
t.Fatal("Streaming response should not be nil")
431+
}
432+
responseCount++
433+
434+
if response.BifrostChatResponse != nil {
435+
chatResp := response.BifrostChatResponse
436+
437+
// Check for reasoning in choices
438+
if len(chatResp.Choices) > 0 {
439+
for _, choice := range chatResp.Choices {
440+
if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil {
441+
delta := choice.ChatStreamResponseChoice.Delta
442+
443+
// Check for reasoning content in delta
444+
if delta.Reasoning != nil && *delta.Reasoning != "" {
445+
reasoningDetected = true
446+
t.Logf("🧠 Reasoning content detected: %q", *delta.Reasoning)
447+
}
448+
449+
// Check for reasoning details in delta
450+
if len(delta.ReasoningDetails) > 0 {
451+
reasoningDetailsDetected = true
452+
t.Logf("🧠 Reasoning details detected: %d entries", len(delta.ReasoningDetails))
453+
454+
for _, detail := range delta.ReasoningDetails {
455+
t.Logf(" - Type: %s, Index: %d", detail.Type, detail.Index)
456+
switch detail.Type {
457+
case schemas.BifrostReasoningDetailsTypeText:
458+
if detail.Text != nil && *detail.Text != "" {
459+
maxLen := 100
460+
text := *detail.Text
461+
if len(text) < maxLen {
462+
maxLen = len(text)
463+
}
464+
t.Logf(" Text preview: %q", text[:maxLen])
465+
}
466+
case schemas.BifrostReasoningDetailsTypeSummary:
467+
if detail.Summary != nil {
468+
t.Logf(" Summary length: %d", len(*detail.Summary))
469+
}
470+
case schemas.BifrostReasoningDetailsTypeEncrypted:
471+
if detail.Data != nil {
472+
t.Logf(" Encrypted data length: %d", len(*detail.Data))
473+
}
474+
}
475+
}
476+
}
477+
}
478+
}
479+
}
480+
481+
// Check for reasoning tokens in usage (usually in final chunk)
482+
if chatResp.Usage != nil && chatResp.Usage.CompletionTokensDetails != nil {
483+
if chatResp.Usage.CompletionTokensDetails.ReasoningTokens > 0 {
484+
reasoningTokensDetected = true
485+
t.Logf("🔢 Reasoning tokens used: %d", chatResp.Usage.CompletionTokensDetails.ReasoningTokens)
486+
}
487+
}
488+
}
489+
490+
if responseCount > 150 {
491+
goto reasoningStreamComplete
492+
}
493+
494+
case <-streamCtx.Done():
495+
t.Fatal("Timeout waiting for chat completion streaming response with reasoning")
496+
}
497+
}
498+
499+
reasoningStreamComplete:
500+
if responseCount == 0 {
501+
t.Fatal("Should receive at least one streaming response")
502+
}
503+
504+
// At least one of these should be detected for reasoning
505+
if !reasoningDetected && !reasoningDetailsDetected && !reasoningTokensDetected {
506+
t.Logf("⚠️ Warning: No explicit reasoning indicators found in streaming response")
507+
} else {
508+
t.Logf("✅ Reasoning indicators detected:")
509+
if reasoningDetected {
510+
t.Logf(" - Reasoning content found")
511+
}
512+
if reasoningDetailsDetected {
513+
t.Logf(" - Reasoning details found")
514+
}
515+
if reasoningTokensDetected {
516+
t.Logf(" - Reasoning tokens reported")
517+
}
518+
}
519+
520+
t.Logf("✅ Chat completion streaming with reasoning test completed successfully")
521+
})
522+
523+
// Additional test with full validation and retry support
524+
t.Run("ChatCompletionStreamWithReasoningValidated", func(t *testing.T) {
525+
if os.Getenv("SKIP_PARALLEL_TESTS") != "true" {
526+
t.Parallel()
527+
}
528+
529+
if testConfig.Provider == schemas.OpenAI || testConfig.Provider == schemas.Groq {
530+
// OpenAI and Groq because reasoning for them in stream is extremely flaky
531+
t.Skip("Skipping ChatCompletionStreamWithReasoningValidated test for OpenAI and Groq")
532+
return
533+
}
534+
535+
problemPrompt := "A farmer has 100 chickens and 50 cows. Each chicken lays 5 eggs per week, and each cow produces 20 liters of milk per day. If the farmer sells eggs for $0.25 each and milk for $1.50 per liter, and it costs $2 per week to feed each chicken and $15 per week to feed each cow, what is the farmer's weekly profit?"
536+
if testConfig.Provider == schemas.Cerebras {
537+
problemPrompt = "Hello how are you, can you search hackernews news regarding maxim ai for me? use your tools for this"
538+
}
539+
540+
messages := []schemas.ChatMessage{
541+
CreateBasicChatMessage(problemPrompt),
542+
}
543+
544+
request := &schemas.BifrostChatRequest{
545+
Provider: testConfig.Provider,
546+
Model: testConfig.ReasoningModel,
547+
Input: messages,
548+
Params: &schemas.ChatParameters{
549+
MaxCompletionTokens: bifrost.Ptr(1800),
550+
Reasoning: &schemas.ChatReasoning{
551+
Effort: bifrost.Ptr("high"),
552+
MaxTokens: bifrost.Ptr(1500),
553+
},
554+
},
555+
Fallbacks: testConfig.Fallbacks,
556+
}
557+
558+
// Use retry framework for stream requests with reasoning and validation
559+
retryConfig := StreamingRetryConfig()
560+
retryContext := TestRetryContext{
561+
ScenarioName: "ChatCompletionStreamWithReasoningValidated",
562+
ExpectedBehavior: map[string]interface{}{
563+
"should_stream_reasoning": true,
564+
"should_have_reasoning_indicators": true,
565+
"problem_type": "mathematical",
566+
},
567+
TestMetadata: map[string]interface{}{
568+
"provider": testConfig.Provider,
569+
"model": testConfig.ReasoningModel,
570+
"reasoning": true,
571+
"validated": true,
572+
},
573+
}
574+
575+
// Use validation retry wrapper that includes stream reading and validation
576+
validationResult := WithChatStreamValidationRetry(
577+
t,
578+
retryConfig,
579+
retryContext,
580+
func() (chan *schemas.BifrostStream, *schemas.BifrostError) {
581+
return client.ChatCompletionStreamRequest(ctx, request)
582+
},
583+
func(responseChannel chan *schemas.BifrostStream) ChatStreamValidationResult {
584+
var reasoningDetected bool
585+
var reasoningDetailsDetected bool
586+
var reasoningTokensDetected bool
587+
var responseCount int
588+
var streamErrors []string
589+
var fullContent strings.Builder
590+
591+
streamCtx, cancel := context.WithTimeout(ctx, 200*time.Second)
592+
defer cancel()
593+
594+
t.Logf("🧠 Testing validated chat completion streaming with reasoning...")
595+
596+
for {
597+
select {
598+
case response, ok := <-responseChannel:
599+
if !ok {
600+
goto validatedReasoningStreamComplete
601+
}
602+
603+
if response == nil {
604+
streamErrors = append(streamErrors, "❌ Streaming response should not be nil")
605+
continue
606+
}
607+
responseCount++
608+
609+
if response.BifrostChatResponse != nil {
610+
chatResp := response.BifrostChatResponse
611+
612+
// Check for reasoning in choices
613+
if len(chatResp.Choices) > 0 {
614+
for _, choice := range chatResp.Choices {
615+
if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil {
616+
delta := choice.ChatStreamResponseChoice.Delta
617+
618+
// Accumulate content
619+
if delta.Content != nil {
620+
fullContent.WriteString(*delta.Content)
621+
t.Logf("📝 Content chunk received (length: %d, total so far: %d)", len(*delta.Content), fullContent.Len())
622+
}
623+
624+
// Check for reasoning content in delta
625+
if delta.Reasoning != nil && *delta.Reasoning != "" {
626+
reasoningDetected = true
627+
t.Logf("🧠 Reasoning content detected (length: %d)", len(*delta.Reasoning))
628+
}
629+
630+
// Check for reasoning details in delta
631+
if len(delta.ReasoningDetails) > 0 {
632+
reasoningDetailsDetected = true
633+
t.Logf("🧠 Reasoning details detected: %d entries", len(delta.ReasoningDetails))
634+
}
635+
}
636+
}
637+
}
638+
639+
// Check for reasoning tokens in usage
640+
if chatResp.Usage != nil && chatResp.Usage.CompletionTokensDetails != nil {
641+
if chatResp.Usage.CompletionTokensDetails.ReasoningTokens > 0 {
642+
reasoningTokensDetected = true
643+
t.Logf("🔢 Reasoning tokens: %d", chatResp.Usage.CompletionTokensDetails.ReasoningTokens)
644+
}
645+
}
646+
}
647+
648+
if responseCount > 150 {
649+
goto validatedReasoningStreamComplete
650+
}
651+
652+
case <-streamCtx.Done():
653+
streamErrors = append(streamErrors, "❌ Timeout waiting for streaming response with reasoning")
654+
goto validatedReasoningStreamComplete
655+
}
656+
}
657+
658+
validatedReasoningStreamComplete:
659+
var errors []string
660+
if responseCount == 0 {
661+
errors = append(errors, "❌ Should receive at least one streaming response")
662+
}
663+
664+
// Check if at least one reasoning indicator is present
665+
hasAnyReasoningIndicator := reasoningDetected || reasoningDetailsDetected || reasoningTokensDetected
666+
if !hasAnyReasoningIndicator {
667+
errors = append(errors, fmt.Sprintf("❌ No reasoning indicators found in streaming response (received %d chunks)", responseCount))
668+
}
669+
670+
// Check content - for reasoning models, content may come after reasoning or may not be present
671+
// If reasoning is detected, we consider it a valid response even without content
672+
content := strings.TrimSpace(fullContent.String())
673+
if content == "" && !hasAnyReasoningIndicator {
674+
// Only require content if no reasoning indicators were found
675+
errors = append(errors, "❌ No content received in streaming response and no reasoning indicators found")
676+
} else if content == "" && hasAnyReasoningIndicator {
677+
// Log a warning but don't fail if reasoning is present
678+
t.Logf("⚠️ Warning: Reasoning detected but no content chunks received (this may be expected for some reasoning models)")
679+
}
680+
681+
if len(streamErrors) > 0 {
682+
errors = append(errors, streamErrors...)
683+
}
684+
685+
return ChatStreamValidationResult{
686+
Passed: len(errors) == 0,
687+
Errors: errors,
688+
ReceivedData: responseCount > 0 && (content != "" || hasAnyReasoningIndicator),
689+
StreamErrors: streamErrors,
690+
ToolCallDetected: false, // Not testing tool calls here
691+
ResponseCount: responseCount,
692+
}
693+
},
694+
)
695+
696+
// Check validation result
697+
if !validationResult.Passed {
698+
allErrors := append(validationResult.Errors, validationResult.StreamErrors...)
699+
t.Fatalf("❌ Chat completion stream with reasoning validation failed after retries: %s", strings.Join(allErrors, "; "))
700+
}
701+
702+
if validationResult.ResponseCount == 0 {
703+
t.Fatalf("❌ Should receive at least one streaming response")
704+
}
705+
706+
t.Logf("✅ Validated chat completion streaming with reasoning test completed successfully")
707+
})
708+
}
358709
}

0 commit comments

Comments
 (0)