Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 22 additions & 16 deletions internal/runtime/executor/kiro_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,16 +166,17 @@ type KiroExecutor struct {
// This is critical because OpenAI and Claude formats have different tool structures:
// - OpenAI: tools[].function.name, tools[].function.description
// - Claude: tools[].name, tools[].description
// headers parameter allows checking Anthropic-Beta header for thinking mode detection.
// Returns the serialized JSON payload and a boolean indicating whether thinking mode was injected.
func buildKiroPayloadForFormat(body []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, sourceFormat sdktranslator.Format) ([]byte, bool) {
func buildKiroPayloadForFormat(body []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, sourceFormat sdktranslator.Format, headers http.Header) ([]byte, bool) {
switch sourceFormat.String() {
case "openai":
log.Debugf("kiro: using OpenAI payload builder for source format: %s", sourceFormat.String())
return kiroopenai.BuildKiroPayloadFromOpenAI(body, modelID, profileArn, origin, isAgentic, isChatOnly)
return kiroopenai.BuildKiroPayloadFromOpenAI(body, modelID, profileArn, origin, isAgentic, isChatOnly, headers, nil)
default:
// Default to Claude format (also handles "claude", "kiro", etc.)
log.Debugf("kiro: using Claude payload builder for source format: %s", sourceFormat.String())
return kiroclaude.BuildKiroPayload(body, modelID, profileArn, origin, isAgentic, isChatOnly)
return kiroclaude.BuildKiroPayload(body, modelID, profileArn, origin, isAgentic, isChatOnly, headers, nil)
}
}

Expand Down Expand Up @@ -249,7 +250,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.

// Rebuild payload with the correct origin for this endpoint
// Each endpoint requires its matching Origin value in the request body
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)

log.Debugf("kiro: trying endpoint %d/%d: %s (Name: %s, Origin: %s)",
endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin)
Expand Down Expand Up @@ -359,7 +360,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
// Rebuild payload with new profile ARN if changed
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
log.Infof("kiro: token refreshed successfully, retrying request")
continue
}
Expand Down Expand Up @@ -416,7 +417,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
if refreshedAuth != nil {
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
log.Infof("kiro: token refreshed for 403, retrying request")
continue
}
Expand Down Expand Up @@ -555,10 +556,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox

// Rebuild payload with the correct origin for this endpoint
// Each endpoint requires its matching Origin value in the request body
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
// Kiro API always returns <thinking> tags regardless of whether thinking mode was requested
// So we always enable thinking parsing for Kiro responses
thinkingEnabled := true
kiroPayload, thinkingEnabled := buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)

log.Debugf("kiro: stream trying endpoint %d/%d: %s (Name: %s, Origin: %s)",
endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin)
Expand Down Expand Up @@ -681,7 +679,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
// Rebuild payload with new profile ARN if changed
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
log.Infof("kiro: token refreshed successfully, retrying stream request")
continue
}
Expand Down Expand Up @@ -738,7 +736,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
if refreshedAuth != nil {
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
log.Infof("kiro: token refreshed for 403, retrying stream request")
continue
}
Expand Down Expand Up @@ -1702,6 +1700,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
pendingEndTagChars := 0 // Number of chars that might be start of </thinking>
isThinkingBlockOpen := false // Track if thinking content block is open
thinkingBlockIndex := -1 // Index of the thinking content block
var accumulatedThinkingContent strings.Builder // Accumulate thinking content for signature generation

// Code block state tracking for heuristic thinking tag parsing
// When inside a markdown code block, <thinking> tags should NOT be parsed
Expand Down Expand Up @@ -1847,6 +1846,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
}
}
// Accumulate thinking content for signature generation
accumulatedThinkingContent.WriteString(pendingText)
} else {
// Output as regular text
if !isTextBlockOpen {
Expand Down Expand Up @@ -2390,21 +2391,24 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
}
}
// Accumulate thinking content for signature generation
accumulatedThinkingContent.WriteString(thinkContent)
}

// Note: Partial tag handling is done via pendingEndTagChars
// When the next chunk arrives, the partial tag will be reconstructed

// Close thinking block
if isThinkingBlockOpen {
blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(thinkingBlockIndex)
blockStop := kiroclaude.BuildClaudeThinkingBlockStopEvent(thinkingBlockIndex)
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam)
for _, chunk := range sseData {
if chunk != "" {
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
}
}
isThinkingBlockOpen = false
accumulatedThinkingContent.Reset() // Reset for potential next thinking block
}

inThinkBlock = false
Expand Down Expand Up @@ -2450,6 +2454,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
}
}
// Accumulate thinking content for signature generation
accumulatedThinkingContent.WriteString(contentToEmit)
}

remaining = ""
Expand Down Expand Up @@ -2592,6 +2598,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
// Handle tool uses in response (with deduplication)
for _, tu := range toolUses {
toolUseID := kirocommon.GetString(tu, "toolUseId")
toolName := kirocommon.GetString(tu, "name")

// Check for duplicate
if processedIDs[toolUseID] {
Expand All @@ -2615,7 +2622,6 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out

// Emit tool_use content block
contentBlockIndex++
toolName := kirocommon.GetString(tu, "name")

blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", toolUseID, toolName)
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
Expand Down Expand Up @@ -2888,7 +2894,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
if calculatedInputTokens > 0 {
localEstimate := totalUsage.InputTokens
totalUsage.InputTokens = calculatedInputTokens
log.Infof("kiro: using contextUsagePercentage (%.2f%%) to calculate input tokens: %d (local estimate was: %d)",
log.Debugf("kiro: using contextUsagePercentage (%.2f%%) to calculate input tokens: %d (local estimate was: %d)",
upstreamContextPercentage, calculatedInputTokens, localEstimate)
}
}
Expand All @@ -2897,7 +2903,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out

// Log upstream usage information if received
if hasUpstreamUsage {
log.Infof("kiro: upstream usage - credits: %.4f, context: %.2f%%, final tokens - input: %d, output: %d, total: %d",
log.Debugf("kiro: upstream usage - credits: %.4f, context: %.2f%%, final tokens - input: %d, output: %d, total: %d",
upstreamCreditUsage, upstreamContextPercentage,
totalUsage.InputTokens, totalUsage.OutputTokens, totalUsage.TotalTokens)
}
Expand Down
100 changes: 68 additions & 32 deletions internal/translator/kiro/claude/kiro_claude_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package claude
import (
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
"unicode/utf8"
Expand Down Expand Up @@ -33,6 +34,7 @@ type KiroInferenceConfig struct {
TopP float64 `json:"topP,omitempty"`
}


// KiroConversationState holds the conversation context
type KiroConversationState struct {
ChatTriggerType string `json:"chatTriggerType"` // Required: "MANUAL" - must be first field
Expand Down Expand Up @@ -134,9 +136,11 @@ func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bo
// origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE.
// isAgentic parameter enables chunked write optimization prompt for -agentic model variants.
// isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode).
// Supports thinking mode - when Claude API thinking parameter is present, injects thinkingHint.
// headers parameter allows checking Anthropic-Beta header for thinking mode detection.
// metadata parameter is kept for API compatibility but no longer used for thinking configuration.
// Supports thinking mode - when enabled, injects thinking tags into system prompt.
// Returns the payload and a boolean indicating whether thinking mode was injected.
func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) ([]byte, bool) {
func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, headers http.Header, metadata map[string]any) ([]byte, bool) {
// Extract max_tokens for potential use in inferenceConfig
// Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
const kiroMaxOutputTokens = 32000
Expand Down Expand Up @@ -181,26 +185,9 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
// Extract system prompt
systemPrompt := extractSystemPrompt(claudeBody)

// Check for thinking mode using the comprehensive IsThinkingEnabled function
// This supports Claude API format, OpenAI reasoning_effort, and AMP/Cursor format
thinkingEnabled := IsThinkingEnabled(claudeBody)
_, budgetTokens := checkThinkingMode(claudeBody) // Get budget tokens from Claude format if available
if budgetTokens <= 0 {
// Calculate budgetTokens based on max_tokens if available
// Use 50% of max_tokens for thinking, with min 8000 and max 24000
if maxTokens > 0 {
budgetTokens = maxTokens / 2
if budgetTokens < 8000 {
budgetTokens = 8000
}
if budgetTokens > 24000 {
budgetTokens = 24000
}
log.Debugf("kiro: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens)
} else {
budgetTokens = 16000 // Default budget tokens
}
}
// Check for thinking mode using the comprehensive IsThinkingEnabledWithHeaders function
// This supports Claude API format, OpenAI reasoning_effort, AMP/Cursor format, and Anthropic-Beta header
thinkingEnabled := IsThinkingEnabledWithHeaders(claudeBody, headers)

// Inject timestamp context
timestamp := time.Now().Format("2006-01-02 15:04:05 MST")
Expand Down Expand Up @@ -231,19 +218,26 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
log.Debugf("kiro: injected tool_choice hint into system prompt")
}

// Inject thinking hint when thinking mode is enabled
// Convert Claude tools to Kiro format
kiroTools := convertClaudeToolsToKiro(tools)

// Thinking mode implementation:
// Kiro API doesn't accept max_tokens for thinking. Instead, thinking mode is enabled
// by injecting <thinking_mode> and <max_thinking_length> tags into the system prompt.
// We use a fixed max_thinking_length value since Kiro handles the actual budget internally.
if thinkingEnabled {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The PR description mentions preventing duplicate injection of thinking tags, and the function hasThinkingTagInBody was added for this purpose. However, this check is not being used within BuildKiroPayload. As a result, the thinking hint will be injected even if the request body already contains <thinking_mode> or <max_thinking_length> tags.

Suggested change
if thinkingEnabled {
if thinkingEnabled && !hasThinkingTagInBody(claudeBody) {

thinkingHint := `<thinking_mode>interleaved</thinking_mode>
<max_thinking_length>200000</max_thinking_length>

IMPORTANT: You MUST use <thinking>...</thinking> tags to show your reasoning process before providing your final response. Think step by step inside the thinking tags.`
if systemPrompt != "" {
systemPrompt += "\n"
systemPrompt = thinkingHint + "\n\n" + systemPrompt
} else {
systemPrompt = thinkingHint
}
dynamicThinkingHint := fmt.Sprintf("<thinking_mode>interleaved</thinking_mode><max_thinking_length>%d</max_thinking_length>", budgetTokens)
systemPrompt += dynamicThinkingHint
log.Debugf("kiro: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens)
log.Infof("kiro: injected thinking prompt, has_tools: %v", len(kiroTools) > 0)
}

// Convert Claude tools to Kiro format
kiroTools := convertClaudeToolsToKiro(tools)

// Process messages and build history
history, currentUserMsg, currentToolResults := processMessages(messages, modelID, origin)

Expand Down Expand Up @@ -280,6 +274,7 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
}

// Build inferenceConfig if we have any inference parameters
// Note: Kiro API doesn't actually use max_tokens for thinking budget
var inferenceConfig *KiroInferenceConfig
if maxTokens > 0 || hasTemperature || hasTopP {
inferenceConfig = &KiroInferenceConfig{}
Expand Down Expand Up @@ -350,7 +345,7 @@ func extractSystemPrompt(claudeBody []byte) string {
// checkThinkingMode checks if thinking mode is enabled in the Claude request
func checkThinkingMode(claudeBody []byte) (bool, int64) {
thinkingEnabled := false
var budgetTokens int64 = 16000
var budgetTokens int64 = 24000

thinkingField := gjson.GetBytes(claudeBody, "thinking")
if thinkingField.Exists() {
Expand All @@ -373,6 +368,32 @@ func checkThinkingMode(claudeBody []byte) (bool, int64) {
return thinkingEnabled, budgetTokens
}

// hasThinkingTagInBody checks if the request body already contains thinking configuration tags.
// This is used to prevent duplicate injection when client (e.g., AMP/Cursor) already includes thinking config.
func hasThinkingTagInBody(body []byte) bool {
bodyStr := string(body)
return strings.Contains(bodyStr, "<thinking_mode>") || strings.Contains(bodyStr, "<max_thinking_length>")
}


// IsThinkingEnabledFromHeader checks if thinking mode is enabled via Anthropic-Beta header.
// Claude CLI uses "Anthropic-Beta: interleaved-thinking-2025-05-14" to enable thinking.
func IsThinkingEnabledFromHeader(headers http.Header) bool {
if headers == nil {
return false
}
betaHeader := headers.Get("Anthropic-Beta")
if betaHeader == "" {
return false
}
// Check for interleaved-thinking beta feature
if strings.Contains(betaHeader, "interleaved-thinking") {
log.Debugf("kiro: thinking mode enabled via Anthropic-Beta header: %s", betaHeader)
return true
}
return false
}

// IsThinkingEnabled is a public wrapper to check if thinking mode is enabled.
// This is used by the executor to determine whether to parse <thinking> tags in responses.
// When thinking is NOT enabled in the request, <thinking> tags in responses should be
Expand All @@ -383,6 +404,21 @@ func checkThinkingMode(claudeBody []byte) (bool, int64) {
// - OpenAI format: reasoning_effort parameter
// - AMP/Cursor format: <thinking_mode>interleaved</thinking_mode> in system prompt
func IsThinkingEnabled(body []byte) bool {
return IsThinkingEnabledWithHeaders(body, nil)
}

// IsThinkingEnabledWithHeaders checks if thinking mode is enabled from body or headers.
// This is the comprehensive check that supports all thinking detection methods:
// - Claude API format: thinking.type = "enabled"
// - OpenAI format: reasoning_effort parameter
// - AMP/Cursor format: <thinking_mode>interleaved</thinking_mode> in system prompt
// - Anthropic-Beta header: interleaved-thinking-2025-05-14
func IsThinkingEnabledWithHeaders(body []byte, headers http.Header) bool {
// Check Anthropic-Beta header first (Claude Code uses this)
if IsThinkingEnabledFromHeader(headers) {
return true
}

// Check Claude API format first (thinking.type = "enabled")
enabled, _ := checkThinkingMode(body)
if enabled {
Expand Down Expand Up @@ -771,4 +807,4 @@ func BuildAssistantMessageStruct(msg gjson.Result) KiroAssistantResponseMessage
Content: contentBuilder.String(),
ToolUses: toolUses,
}
}
}
28 changes: 24 additions & 4 deletions internal/translator/kiro/claude/kiro_claude_response.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
package claude

import (
"crypto/sha256"
"encoding/base64"
"encoding/json"
"strings"

Expand All @@ -14,6 +16,18 @@ import (
kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common"
)

// generateThinkingSignature generates a signature for thinking content.
// This is required by Claude API for thinking blocks in non-streaming responses.
// The signature is a base64-encoded hash of the thinking content.
func generateThinkingSignature(thinkingContent string) string {
if thinkingContent == "" {
return ""
}
// Generate a deterministic signature based on content hash
hash := sha256.Sum256([]byte(thinkingContent))
return base64.StdEncoding.EncodeToString(hash[:])
}

// Local references to kirocommon constants for thinking block parsing
var (
thinkingStartTag = kirocommon.ThinkingStartTag
Expand Down Expand Up @@ -149,9 +163,12 @@ func ExtractThinkingFromContent(content string) []map[string]interface{} {
if endIdx == -1 {
// No closing tag found, treat rest as thinking content (incomplete response)
if strings.TrimSpace(remaining) != "" {
// Generate signature for thinking content (required by Claude API)
signature := generateThinkingSignature(remaining)
blocks = append(blocks, map[string]interface{}{
"type": "thinking",
"thinking": remaining,
"type": "thinking",
"thinking": remaining,
"signature": signature,
})
log.Warnf("kiro: extractThinkingFromContent - missing closing </thinking> tag")
}
Expand All @@ -161,9 +178,12 @@ func ExtractThinkingFromContent(content string) []map[string]interface{} {
// Extract thinking content between tags
thinkContent := remaining[:endIdx]
if strings.TrimSpace(thinkContent) != "" {
// Generate signature for thinking content (required by Claude API)
signature := generateThinkingSignature(thinkContent)
blocks = append(blocks, map[string]interface{}{
"type": "thinking",
"thinking": thinkContent,
"type": "thinking",
"thinking": thinkContent,
"signature": signature,
})
log.Debugf("kiro: extractThinkingFromContent - extracted thinking block (len: %d)", len(thinkContent))
}
Expand Down
Loading
Loading