From a89514951f5676feed3594b1f806920e32507ebf Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Wed, 10 Dec 2025 22:19:55 +0800
Subject: [PATCH 01/15] fix(claude): prevent final events when no content
 streamed

---
 .../claude/antigravity_claude_response.go     | 23 ++++++--
 .../claude/gemini-cli_claude_response.go      | 57 ++++++++++++-------
 .../gemini/claude/gemini_claude_response.go   | 45 +++++++++------
 3 files changed, 83 insertions(+), 42 deletions(-)

diff --git a/internal/translator/antigravity/claude/antigravity_claude_response.go b/internal/translator/antigravity/claude/antigravity_claude_response.go
index 42265e80e..28785a8fb 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_response.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_response.go
@@ -35,6 +35,7 @@ type Params struct {
 	TotalTokenCount      int64  // Cached total token count from usage metadata
 	HasSentFinalEvents   bool   // Indicates if final content/message events have been sent
 	HasToolUse           bool   // Indicates if tool use was observed in the stream
+	HasContent           bool   // Tracks whether any content (text, thinking, or tool use) has been output
 }
 
 // toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
@@ -69,11 +70,14 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 
 	if bytes.Equal(rawJSON, []byte("[DONE]")) {
 		output := ""
-		appendFinalEvents(params, &output, true)
-
-		return []string{
-			output + "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
+		// Only send final events if we have actually output content
+		if params.HasContent {
+			appendFinalEvents(params, &output, true)
+			return []string{
+				output + "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
+			}
 		}
+		return []string{}
 	}
 
 	output := ""
@@ -119,10 +123,12 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 						output = output + "event: content_block_delta\n"
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", thoughtSignature.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
+						params.HasContent = true
 					} else if params.ResponseType == 2 { // Continue existing thinking block if already in thinking state
 						output = output + "event: content_block_delta\n"
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
+						params.HasContent = true
 					} else {
 						// Transition from another state to thinking
 						// First, close any existing content block
@@ -146,6 +152,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
 						params.ResponseType = 2 // Set state to thinking
+						params.HasContent = true
 					}
 				} else {
 					finishReasonResult := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason")
@@ -156,6 +163,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 							output = output + "event: content_block_delta\n"
 							data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
 							output = output + fmt.Sprintf("data: %s\n\n\n", data)
+							params.HasContent = true
 						} else {
 							// Transition from another state to text content
 							// First, close any existing content block
@@ -179,6 +187,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 								data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
 								output = output + fmt.Sprintf("data: %s\n\n\n", data)
 								params.ResponseType = 1 // Set state to content
+								params.HasContent = true
 							}
 						}
 					}
@@ -230,6 +239,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 					output = output + fmt.Sprintf("data: %s\n\n\n", data)
 				}
 				params.ResponseType = 3
+				params.HasContent = true
 			}
 		}
 	}
@@ -269,6 +279,11 @@ func appendFinalEvents(params *Params, output *string, force bool) {
 		return
 	}
 
+	// Only send final events if we have actually output content
+	if !params.HasContent {
+		return
+	}
+
 	if params.ResponseType != 0 {
 		*output = *output + "event: content_block_stop\n"
 		*output = *output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, params.ResponseIndex)
diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_response.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_response.go
index 9b37c52b6..ca905f9e3 100644
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_response.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_response.go
@@ -26,6 +26,7 @@ type Params struct {
 	HasFirstResponse bool // Indicates if the initial message_start event has been sent
 	ResponseType     int  // Current response type: 0=none, 1=content, 2=thinking, 3=function
 	ResponseIndex    int  // Index counter for content blocks in the streaming response
+	HasContent       bool // Tracks whether any content (text, thinking, or tool use) has been output
 }
 
 // toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
@@ -57,9 +58,13 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
 	}
 
 	if bytes.Equal(rawJSON, []byte("[DONE]")) {
-		return []string{
-			"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
+		// Only send message_stop if we have actually output content
+		if (*param).(*Params).HasContent {
+			return []string{
+				"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
+			}
 		}
+		return []string{}
 	}
 
 	// Track whether tools are being used in this response chunk
@@ -108,6 +113,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
 						output = output + "event: content_block_delta\n"
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
+						(*param).(*Params).HasContent = true
 					} else {
 						// Transition from another state to thinking
 						// First, close any existing content block
@@ -131,6 +137,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
 						(*param).(*Params).ResponseType = 2 // Set state to thinking
+						(*param).(*Params).HasContent = true
 					}
 				} else {
 					// Process regular text content (user-visible output)
@@ -139,6 +146,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
 						output = output + "event: content_block_delta\n"
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
+						(*param).(*Params).HasContent = true
 					} else {
 						// Transition from another state to text content
 						// First, close any existing content block
@@ -162,6 +170,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
 						(*param).(*Params).ResponseType = 1 // Set state to content
+						(*param).(*Params).HasContent = true
 					}
 				}
 			} else if functionCallResult.Exists() {
@@ -211,6 +220,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
 					output = output + fmt.Sprintf("data: %s\n\n\n", data)
 				}
 				(*param).(*Params).ResponseType = 3
+				(*param).(*Params).HasContent = true
 			}
 		}
 	}
@@ -219,28 +229,31 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
 	// Process usage metadata and finish reason when present in the response
 	if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
 		if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
-			// Close the final content block
-			output = output + "event: content_block_stop\n"
-			output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
-			output = output + "\n\n\n"
-
-			// Send the final message delta with usage information and stop reason
-			output = output + "event: message_delta\n"
-			output = output + `data: `
-
-			// Create the message delta template with appropriate stop reason
-			template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
-			// Set tool_use stop reason if tools were used in this response
-			if usedTool {
-				template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
-			}
+			// Only send final events if we have actually output content
+			if (*param).(*Params).HasContent {
+				// Close the final content block
+				output = output + "event: content_block_stop\n"
+				output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
+				output = output + "\n\n\n"
+
+				// Send the final message delta with usage information and stop reason
+				output = output + "event: message_delta\n"
+				output = output + `data: `
+
+				// Create the message delta template with appropriate stop reason
+				template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
+				// Set tool_use stop reason if tools were used in this response
+				if usedTool {
+					template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
+				}
 
-			// Include thinking tokens in output token count if present
-			thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
-			template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
-			template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
+				// Include thinking tokens in output token count if present
+				thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
+				template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
+				template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
 
-			output = output + template + "\n\n\n"
+				output = output + template + "\n\n\n"
+			}
 		}
 	}
 
diff --git a/internal/translator/gemini/claude/gemini_claude_response.go b/internal/translator/gemini/claude/gemini_claude_response.go
index 8fd566df3..7767c3652 100644
--- a/internal/translator/gemini/claude/gemini_claude_response.go
+++ b/internal/translator/gemini/claude/gemini_claude_response.go
@@ -25,6 +25,7 @@ type Params struct {
 	HasFirstResponse bool
 	ResponseType     int
 	ResponseIndex    int
+	HasContent       bool // Tracks whether any content (text, thinking, or tool use) has been output
 }
 
 // toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
@@ -57,9 +58,13 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
 	}
 
 	if bytes.Equal(rawJSON, []byte("[DONE]")) {
-		return []string{
-			"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
+		// Only send message_stop if we have actually output content
+		if (*param).(*Params).HasContent {
+			return []string{
+				"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
+			}
 		}
+		return []string{}
 	}
 
 	// Track whether tools are being used in this response chunk
@@ -108,6 +113,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
 						output = output + "event: content_block_delta\n"
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
+						(*param).(*Params).HasContent = true
 					} else {
 						// Transition from another state to thinking
 						// First, close any existing content block
@@ -131,6 +137,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
 						(*param).(*Params).ResponseType = 2 // Set state to thinking
+						(*param).(*Params).HasContent = true
 					}
 				} else {
 					// Process regular text content (user-visible output)
@@ -139,6 +146,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
 						output = output + "event: content_block_delta\n"
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
+						(*param).(*Params).HasContent = true
 					} else {
 						// Transition from another state to text content
 						// First, close any existing content block
@@ -162,6 +170,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
 						(*param).(*Params).ResponseType = 1 // Set state to content
+						(*param).(*Params).HasContent = true
 					}
 				}
 			} else if functionCallResult.Exists() {
@@ -211,6 +220,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
 					output = output + fmt.Sprintf("data: %s\n\n\n", data)
 				}
 				(*param).(*Params).ResponseType = 3
+				(*param).(*Params).HasContent = true
 			}
 		}
 	}
@@ -218,23 +228,26 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
 	usageResult := gjson.GetBytes(rawJSON, "usageMetadata")
 	if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
 		if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
-			output = output + "event: content_block_stop\n"
-			output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
-			output = output + "\n\n\n"
+			// Only send final events if we have actually output content
+			if (*param).(*Params).HasContent {
+				output = output + "event: content_block_stop\n"
+				output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
+				output = output + "\n\n\n"
+
+				output = output + "event: message_delta\n"
+				output = output + `data: `
+
+				template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
+				if usedTool {
+					template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
+				}
 
-			output = output + "event: message_delta\n"
-			output = output + `data: `
+				thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
+				template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
+				template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
 
-			template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
-			if usedTool {
-				template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
+				output = output + template + "\n\n\n"
 			}
-
-			thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
-			template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
-			template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
-
-			output = output + template + "\n\n\n"
 		}
 	}
 

From 76c563d161d7687519edfca710d71a01f00caee6 Mon Sep 17 00:00:00 2001
From: sususu <suchangshan@foxmail.com>
Date: Wed, 10 Dec 2025 23:20:04 +0800
Subject: [PATCH 02/15] fix(executor): increase buffer size for stream scanners
 to 50MB across multiple executors

---
 internal/runtime/executor/antigravity_executor.go             | 2 +-
 internal/runtime/executor/claude_executor.go                  | 4 ++--
 internal/runtime/executor/codex_executor.go                   | 2 +-
 internal/runtime/executor/gemini_cli_executor.go              | 2 +-
 internal/runtime/executor/gemini_executor.go                  | 2 +-
 internal/runtime/executor/gemini_vertex_executor.go           | 4 ++--
 internal/runtime/executor/iflow_executor.go                   | 2 +-
 internal/runtime/executor/openai_compat_executor.go           | 2 +-
 internal/runtime/executor/qwen_executor.go                    | 2 +-
 internal/translator/claude/gemini/claude_gemini_response.go   | 4 ++--
 .../openai/responses/claude_openai-responses_response.go      | 4 ++--
 11 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index a32e66ec0..f5b5ef067 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -38,7 +38,7 @@ const (
 	defaultAntigravityAgent     = "antigravity/1.11.5 windows/amd64"
 	antigravityAuthType         = "antigravity"
 	refreshSkew                 = 3000 * time.Second
-	streamScannerBuffer     int = 20_971_520
+	streamScannerBuffer     int = 52_428_800 // 50MB
 )
 
 var randSource = rand.New(rand.NewSource(time.Now().UnixNano()))
diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go
index 1a18c46a5..37e6e6c86 100644
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -238,7 +238,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 		// If from == to (Claude → Claude), directly forward the SSE stream without translation
 		if from == to {
 			scanner := bufio.NewScanner(decodedBody)
-			scanner.Buffer(nil, 20_971_520)
+			scanner.Buffer(nil, 52_428_800) // 50MB
 			for scanner.Scan() {
 				line := scanner.Bytes()
 				appendAPIResponseChunk(ctx, e.cfg, line)
@@ -261,7 +261,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 
 		// For other formats, use translation
 		scanner := bufio.NewScanner(decodedBody)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index 1c4291f64..e9cc8b769 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -205,7 +205,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go
index a2e0ececd..2c4f3f881 100644
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -309,7 +309,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 			}()
 			if opts.Alt == "" {
 				scanner := bufio.NewScanner(resp.Body)
-				scanner.Buffer(nil, 20_971_520)
+				scanner.Buffer(nil, 52_428_800) // 50MB
 				var param any
 				for scanner.Scan() {
 					line := scanner.Bytes()
diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go
index 8879a4f19..0c52b2030 100644
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -243,7 +243,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go
index c7d10a670..0759d10ac 100644
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -564,7 +564,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
@@ -678,7 +678,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go
index 3589e9226..ee30521a0 100644
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -201,7 +201,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		}()
 
 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB 
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
index 55ec6dc9f..bb27b58c4 100644
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -206,7 +206,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index 0c3e6b56f..63c54bee3 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -181,7 +181,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
diff --git a/internal/translator/claude/gemini/claude_gemini_response.go b/internal/translator/claude/gemini/claude_gemini_response.go
index 0c90398ec..c77cca4bc 100644
--- a/internal/translator/claude/gemini/claude_gemini_response.go
+++ b/internal/translator/claude/gemini/claude_gemini_response.go
@@ -331,8 +331,8 @@ func ConvertClaudeResponseToGeminiNonStream(_ context.Context, modelName string,
 	streamingEvents := make([][]byte, 0)
 
 	scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
-	buffer := make([]byte, 20_971_520)
-	scanner.Buffer(buffer, 20_971_520)
+	buffer := make([]byte, 52_428_800) // 50MB
+	scanner.Buffer(buffer, 52_428_800)
 	for scanner.Scan() {
 		line := scanner.Bytes()
 		// log.Debug(string(line))
diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_response.go b/internal/translator/claude/openai/responses/claude_openai-responses_response.go
index 77507f97b..252967f14 100644
--- a/internal/translator/claude/openai/responses/claude_openai-responses_response.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_response.go
@@ -445,8 +445,8 @@ func ConvertClaudeResponseToOpenAIResponsesNonStream(_ context.Context, _ string
 		// Use a simple scanner to iterate through raw bytes
 		// Note: extremely large responses may require increasing the buffer
 		scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
-		buf := make([]byte, 20_971_520)
-		scanner.Buffer(buf, 20_971_520)
+		buf := make([]byte, 52_428_800) // 50MB
+		scanner.Buffer(buf, 52_428_800)
 		for scanner.Scan() {
 			line := scanner.Bytes()
 			if !bytes.HasPrefix(line, dataTag) {

From a03d514095c4f76d7d5bf986bd1e109854e2868f Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 11:28:44 +0800
Subject: [PATCH 03/15] feat(registry): add thinking metadata for models

---
 internal/registry/model_definitions.go | 13 +++++++++++++
 internal/registry/model_registry.go    |  3 +++
 2 files changed, 16 insertions(+)

diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index 2f87f195b..9956d964f 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -16,6 +16,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4.5 Haiku",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			// Thinking: not supported for Haiku models
 		},
 		{
 			ID:                  "claude-sonnet-4-5-20250929",
@@ -49,6 +50,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4.1 Opus",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                  "claude-opus-4-20250514",
@@ -59,6 +61,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4 Opus",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                  "claude-sonnet-4-20250514",
@@ -69,6 +72,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4 Sonnet",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                  "claude-3-7-sonnet-20250219",
@@ -79,6 +83,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 3.7 Sonnet",
 			ContextLength:       128000,
 			MaxCompletionTokens: 8192,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                  "claude-3-5-haiku-20241022",
@@ -89,6 +94,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 3.5 Haiku",
 			ContextLength:       128000,
 			MaxCompletionTokens: 8192,
+			// Thinking: not supported for Haiku models
 		},
 	}
 }
@@ -476,6 +482,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5-codex",
@@ -489,6 +496,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5-codex-mini",
@@ -502,6 +510,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1",
@@ -515,6 +524,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1-codex",
@@ -528,6 +538,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1-codex-mini",
@@ -541,6 +552,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1-codex-max",
@@ -554,6 +566,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
 		},
 	}
 }
diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go
index 5ef9007f9..f3517bde8 100644
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -63,6 +63,9 @@ type ThinkingSupport struct {
 	ZeroAllowed bool `json:"zero_allowed,omitempty"`
 	// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
 	DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
+	// Levels defines discrete reasoning effort levels (e.g., "low", "medium", "high").
+	// When set, the model uses level-based reasoning instead of token budgets.
+	Levels []string `json:"levels,omitempty"`
 }
 
 // ModelRegistration tracks a model's availability

From 3ffd120ae9e9ce2bf34cc87c9994150ec4474ff6 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 11:51:33 +0800
Subject: [PATCH 04/15] feat(runtime): add thinking config normalization

---
 internal/runtime/executor/codex_executor.go   |  2 +
 .../executor/openai_compat_executor.go        |  8 ++-
 internal/runtime/executor/payload_helpers.go  | 57 +++++++++++++++++++
 internal/util/thinking.go                     | 46 +++++++++++++++
 4 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index 46a301773..3fe5ed6e0 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -55,6 +55,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
+	body = normalizeThinkingConfig(body, upstreamModel)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
@@ -149,6 +150,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
+	body = normalizeThinkingConfig(body, upstreamModel)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
index 93122c202..ba47750e5 100644
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -59,9 +59,11 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
 	translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
-	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
+	translated = normalizeThinkingConfig(translated, upstreamModel)
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -148,9 +150,11 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
 	translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
-	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
+	translated = normalizeThinkingConfig(translated, upstreamModel)
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index 37e3141a3..9d431f114 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -232,3 +232,60 @@ func matchModelPattern(pattern, model string) bool {
 	}
 	return pi == len(pattern)
 }
+
+// normalizeThinkingConfig normalizes thinking-related fields in the payload
+// based on model capabilities. For models without thinking support, it strips
+// reasoning fields. For models with level-based thinking, it validates and
+// normalizes the reasoning effort level.
+func normalizeThinkingConfig(payload []byte, model string) []byte {
+	if len(payload) == 0 || model == "" {
+		return payload
+	}
+
+	if !util.ModelSupportsThinking(model) {
+		return stripThinkingFields(payload)
+	}
+
+	if util.ModelUsesThinkingLevels(model) {
+		return normalizeReasoningEffortLevel(payload, model)
+	}
+
+	return payload
+}
+
+// stripThinkingFields removes thinking-related fields from the payload for
+// models that do not support thinking.
+func stripThinkingFields(payload []byte) []byte {
+	fieldsToRemove := []string{
+		"reasoning",
+		"reasoning_effort",
+		"reasoning.effort",
+	}
+	out := payload
+	for _, field := range fieldsToRemove {
+		if gjson.GetBytes(out, field).Exists() {
+			out, _ = sjson.DeleteBytes(out, field)
+		}
+	}
+	return out
+}
+
+// normalizeReasoningEffortLevel validates and normalizes the reasoning_effort
+// or reasoning.effort field for level-based thinking models.
+func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
+	out := payload
+
+	if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
+		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
+			out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
+		}
+	}
+
+	if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
+		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
+			out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
+		}
+	}
+
+	return out
+}
diff --git a/internal/util/thinking.go b/internal/util/thinking.go
index c16b91cd1..37200980c 100644
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -1,6 +1,8 @@
 package util
 
 import (
+	"strings"
+
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 )
 
@@ -67,3 +69,47 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
 	}
 	return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
 }
+
+// GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
+// Returns nil if the model has no thinking support or no levels defined.
+func GetModelThinkingLevels(model string) []string {
+	if model == "" {
+		return nil
+	}
+	info := registry.GetGlobalRegistry().GetModelInfo(model)
+	if info == nil || info.Thinking == nil {
+		return nil
+	}
+	return info.Thinking.Levels
+}
+
+// ModelUsesThinkingLevels reports whether the model uses discrete reasoning
+// effort levels instead of numeric budgets.
+func ModelUsesThinkingLevels(model string) bool {
+	levels := GetModelThinkingLevels(model)
+	return len(levels) > 0
+}
+
+// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
+// level for the given model. If the level is not supported, it returns the
+// first (lowest) level from the model's supported levels.
+func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
+	levels := GetModelThinkingLevels(model)
+	if len(levels) == 0 {
+		return "", false
+	}
+	loweredEffort := strings.ToLower(strings.TrimSpace(effort))
+	for _, lvl := range levels {
+		if strings.ToLower(lvl) == loweredEffort {
+			return lvl, true
+		}
+	}
+	return defaultReasoningLevel(levels), true
+}
+
+func defaultReasoningLevel(levels []string) string {
+	if len(levels) > 0 {
+		return levels[0]
+	}
+	return ""
+}

From d06d0eab2f12af290453c17d8cb24e595792751a Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 12:14:51 +0800
Subject: [PATCH 05/15] fix(util): centralize reasoning effort normalization

---
 internal/util/thinking.go        | 42 ++++++++++++++++++++++++++++++++
 internal/util/thinking_suffix.go | 26 ++++++++------------
 2 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/internal/util/thinking.go b/internal/util/thinking.go
index 37200980c..bcf92c5b3 100644
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -113,3 +113,45 @@ func defaultReasoningLevel(levels []string) string {
 	}
 	return ""
 }
+
+// standardReasoningEfforts defines the canonical set of reasoning effort levels.
+// This serves as the single source of truth for valid effort values.
+var standardReasoningEfforts = []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"}
+
+// IsValidReasoningEffort checks if the given effort string is a valid reasoning effort level.
+// This is a registry-independent check against the standard effort levels.
+func IsValidReasoningEffort(effort string) bool {
+	if effort == "" {
+		return false
+	}
+	lowered := strings.ToLower(strings.TrimSpace(effort))
+	for _, e := range standardReasoningEfforts {
+		if e == lowered {
+			return true
+		}
+	}
+	return false
+}
+
+// NormalizeReasoningEffort normalizes a reasoning effort string to its canonical form.
+// It first tries registry-based normalization if a model is provided, then falls back
+// to the standard effort levels. Returns empty string and false if invalid.
+func NormalizeReasoningEffort(model, effort string) (string, bool) {
+	if effort == "" {
+		return "", false
+	}
+	lowered := strings.ToLower(strings.TrimSpace(effort))
+
+	if model != "" {
+		if normalized, ok := NormalizeReasoningEffortLevel(model, effort); ok {
+			return normalized, true
+		}
+	}
+
+	for _, e := range standardReasoningEfforts {
+		if e == lowered {
+			return e, true
+		}
+	}
+	return "", false
+}
diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go
index e3fd91366..1a1a8715f 100644
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -58,8 +58,9 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 					baseModel = modelName[:idx]
 					budgetOverride = &parsed
 					matched = true
-				} else if effort, okEffort := normalizeReasoningEffort(value); okEffort {
+				} else if IsValidReasoningEffort(value) {
 					baseModel = modelName[:idx]
+					effort := strings.ToLower(strings.TrimSpace(value))
 					reasoningEffort = &effort
 					matched = true
 				}
@@ -185,7 +186,9 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
 		return "", false
 	}
 	if effort != nil && *effort != "" {
-		return *effort, true
+		if IsValidReasoningEffort(*effort) {
+			return strings.ToLower(strings.TrimSpace(*effort)), true
+		}
 	}
 	if budget != nil {
 		switch *budget {
@@ -207,7 +210,11 @@ func ThinkingEffortToBudget(model, effort string) (int, bool) {
 	if effort == "" {
 		return 0, false
 	}
-	switch strings.ToLower(effort) {
+	normalized, ok := NormalizeReasoningEffort(model, effort)
+	if !ok {
+		return 0, false
+	}
+	switch normalized {
 	case "none":
 		return 0, true
 	case "auto":
@@ -312,16 +319,3 @@ func parseNumberToInt(raw any) (int, bool) {
 	}
 	return 0, false
 }
-
-func normalizeReasoningEffort(value string) (string, bool) {
-	if value == "" {
-		return "", false
-	}
-	effort := strings.ToLower(strings.TrimSpace(value))
-	switch effort {
-	case "minimal", "low", "medium", "high", "xhigh", "auto", "none":
-		return effort, true
-	default:
-		return "", false
-	}
-}

From 169f4295d041b0c2e1089d02073740c36f83e8bf Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 12:20:12 +0800
Subject: [PATCH 06/15] fix(util): align reasoning effort handling with
 registry

---
 internal/util/thinking.go        | 42 --------------------------------
 internal/util/thinking_suffix.go | 19 ++++++++-------
 2 files changed, 10 insertions(+), 51 deletions(-)

diff --git a/internal/util/thinking.go b/internal/util/thinking.go
index bcf92c5b3..37200980c 100644
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -113,45 +113,3 @@ func defaultReasoningLevel(levels []string) string {
 	}
 	return ""
 }
-
-// standardReasoningEfforts defines the canonical set of reasoning effort levels.
-// This serves as the single source of truth for valid effort values.
-var standardReasoningEfforts = []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"}
-
-// IsValidReasoningEffort checks if the given effort string is a valid reasoning effort level.
-// This is a registry-independent check against the standard effort levels.
-func IsValidReasoningEffort(effort string) bool {
-	if effort == "" {
-		return false
-	}
-	lowered := strings.ToLower(strings.TrimSpace(effort))
-	for _, e := range standardReasoningEfforts {
-		if e == lowered {
-			return true
-		}
-	}
-	return false
-}
-
-// NormalizeReasoningEffort normalizes a reasoning effort string to its canonical form.
-// It first tries registry-based normalization if a model is provided, then falls back
-// to the standard effort levels. Returns empty string and false if invalid.
-func NormalizeReasoningEffort(model, effort string) (string, bool) {
-	if effort == "" {
-		return "", false
-	}
-	lowered := strings.ToLower(strings.TrimSpace(effort))
-
-	if model != "" {
-		if normalized, ok := NormalizeReasoningEffortLevel(model, effort); ok {
-			return normalized, true
-		}
-	}
-
-	for _, e := range standardReasoningEfforts {
-		if e == lowered {
-			return e, true
-		}
-	}
-	return "", false
-}
diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go
index 1a1a8715f..c2d806ad9 100644
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -58,11 +58,14 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 					baseModel = modelName[:idx]
 					budgetOverride = &parsed
 					matched = true
-				} else if IsValidReasoningEffort(value) {
+				} else {
 					baseModel = modelName[:idx]
-					effort := strings.ToLower(strings.TrimSpace(value))
-					reasoningEffort = &effort
-					matched = true
+					if normalized, ok := NormalizeReasoningEffortLevel(baseModel, value); ok {
+						reasoningEffort = &normalized
+						matched = true
+					} else {
+						baseModel = modelName
+					}
 				}
 			}
 		} else if strings.HasSuffix(lower, "-thinking") {
@@ -186,9 +189,7 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
 		return "", false
 	}
 	if effort != nil && *effort != "" {
-		if IsValidReasoningEffort(*effort) {
-			return strings.ToLower(strings.TrimSpace(*effort)), true
-		}
+		return strings.ToLower(strings.TrimSpace(*effort)), true
 	}
 	if budget != nil {
 		switch *budget {
@@ -210,9 +211,9 @@ func ThinkingEffortToBudget(model, effort string) (int, bool) {
 	if effort == "" {
 		return 0, false
 	}
-	normalized, ok := NormalizeReasoningEffort(model, effort)
+	normalized, ok := NormalizeReasoningEffortLevel(model, effort)
 	if !ok {
-		return 0, false
+		normalized = strings.ToLower(strings.TrimSpace(effort))
 	}
 	switch normalized {
 	case "none":

From 519da2e04222641a412fb5c17a0bc2cf20428800 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 12:36:54 +0800
Subject: [PATCH 07/15] fix(runtime): validate reasoning effort levels

---
 internal/runtime/executor/codex_executor.go   |  6 ++++
 .../executor/openai_compat_executor.go        |  6 ++++
 internal/runtime/executor/payload_helpers.go  | 35 +++++++++++++++++++
 internal/util/thinking.go                     | 12 ++-----
 4 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index 3fe5ed6e0..7003373f7 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -56,6 +56,9 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
 	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return resp, errValidate
+	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
@@ -151,6 +154,9 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
 	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return nil, errValidate
+	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
index ba47750e5..507b0fd90 100644
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -64,6 +64,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
 	translated = normalizeThinkingConfig(translated, upstreamModel)
+	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
+		return resp, errValidate
+	}
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -155,6 +158,9 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
 	translated = normalizeThinkingConfig(translated, upstreamModel)
+	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
+		return nil, errValidate
+	}
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index 9d431f114..5711bbbdd 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -1,6 +1,8 @@
 package executor
 
 import (
+	"fmt"
+	"net/http"
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -289,3 +291,36 @@ func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
 
 	return out
 }
+
+// validateThinkingConfig checks for unsupported reasoning levels on level-based models.
+// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
+// downgrading requests.
+func validateThinkingConfig(payload []byte, model string) error {
+	if len(payload) == 0 || model == "" {
+		return nil
+	}
+	if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
+		return nil
+	}
+
+	levels := util.GetModelThinkingLevels(model)
+	checkField := func(path string) error {
+		if effort := gjson.GetBytes(payload, path); effort.Exists() {
+			if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
+				return statusErr{
+					code: http.StatusBadRequest,
+					msg:  fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
+				}
+			}
+		}
+		return nil
+	}
+
+	if err := checkField("reasoning_effort"); err != nil {
+		return err
+	}
+	if err := checkField("reasoning.effort"); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/internal/util/thinking.go b/internal/util/thinking.go
index 37200980c..9671f20b5 100644
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -91,8 +91,7 @@ func ModelUsesThinkingLevels(model string) bool {
 }
 
 // NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
-// level for the given model. If the level is not supported, it returns the
-// first (lowest) level from the model's supported levels.
+// level for the given model. Returns false when the level is not supported.
 func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
 	levels := GetModelThinkingLevels(model)
 	if len(levels) == 0 {
@@ -104,12 +103,5 @@ func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
 			return lvl, true
 		}
 	}
-	return defaultReasoningLevel(levels), true
-}
-
-func defaultReasoningLevel(levels []string) string {
-	if len(levels) > 0 {
-		return levels[0]
-	}
-	return ""
+	return "", false
 }

From 3a81ab22fdb6c9b993fac1deef94785f8a8f5dbf Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 14:35:05 +0800
Subject: [PATCH 08/15] fix(runtime): unify reasoning effort metadata overrides

---
 internal/runtime/executor/codex_executor.go   |  6 ++--
 internal/runtime/executor/iflow_executor.go   |  4 +--
 .../executor/openai_compat_executor.go        |  4 +--
 internal/runtime/executor/payload_helpers.go  | 30 +++-------------
 internal/runtime/executor/qwen_executor.go    |  4 +--
 internal/util/thinking_suffix.go              | 34 ++++++++++++++++---
 6 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index 7003373f7..b9470b3c3 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -54,7 +54,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
 	body = normalizeThinkingConfig(body, upstreamModel)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
@@ -152,7 +152,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
 	body = normalizeThinkingConfig(body, upstreamModel)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
@@ -254,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 
 	modelForCounting := req.Model
 
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "stream", false)
diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go
index c68a64315..a445e47da 100644
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -57,7 +57,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
 	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
@@ -143,7 +143,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
-	body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
 	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
index 507b0fd90..68b2963a6 100644
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -58,7 +58,7 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
-	translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
+	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
@@ -152,7 +152,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
-	translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
+	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index 5711bbbdd..61486d62c 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -45,40 +45,20 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
 	return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
 }
 
-// applyReasoningEffortMetadata applies reasoning effort overrides (reasoning.effort) when present in metadata.
-// It avoids overwriting an existing reasoning.effort field and only applies to models that support thinking.
-func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model string) []byte {
+// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
+// Metadata values take precedence over any existing field when the model supports thinking.
+func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
 	if len(metadata) == 0 {
 		return payload
 	}
 	if !util.ModelSupportsThinking(model) {
 		return payload
 	}
-	if gjson.GetBytes(payload, "reasoning.effort").Exists() {
+	if field == "" {
 		return payload
 	}
 	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if updated, err := sjson.SetBytes(payload, "reasoning.effort", effort); err == nil {
-			return updated
-		}
-	}
-	return payload
-}
-
-// applyReasoningEffortMetadataChatCompletions applies reasoning_effort (OpenAI chat completions field)
-// when present in metadata. It avoids overwriting an existing reasoning_effort field.
-func applyReasoningEffortMetadataChatCompletions(payload []byte, metadata map[string]any, model string) []byte {
-	if len(metadata) == 0 {
-		return payload
-	}
-	if !util.ModelSupportsThinking(model) {
-		return payload
-	}
-	if gjson.GetBytes(payload, "reasoning_effort").Exists() {
-		return payload
-	}
-	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if updated, err := sjson.SetBytes(payload, "reasoning_effort", effort); err == nil {
+		if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 			return updated
 		}
 	}
diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index f060cb61e..d25ed5da7 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -51,7 +51,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
 	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
@@ -126,7 +126,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
-	body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
 	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go
index c2d806ad9..47ce42f75 100644
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -55,16 +55,42 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 			value := modelName[idx+len("-thinking-"):]
 			if value != "" {
 				if parsed, ok := parseIntPrefix(value); ok {
-					baseModel = modelName[:idx]
-					budgetOverride = &parsed
-					matched = true
+					candidateBase := modelName[:idx]
+					if ModelUsesThinkingLevels(candidateBase) {
+						baseModel = candidateBase
+						// Numeric suffix on level-aware models should still surface as reasoning effort metadata.
+						raw := strings.ToLower(strings.TrimSpace(value))
+						if raw != "" {
+							reasoningEffort = &raw
+						}
+						matched = true
+					} else {
+						baseModel = candidateBase
+						budgetOverride = &parsed
+						matched = true
+					}
 				} else {
 					baseModel = modelName[:idx]
 					if normalized, ok := NormalizeReasoningEffortLevel(baseModel, value); ok {
 						reasoningEffort = &normalized
 						matched = true
+					} else if !ModelUsesThinkingLevels(baseModel) {
+						// Keep unknown effort tokens so callers can honor user intent even without normalization.
+						raw := strings.ToLower(strings.TrimSpace(value))
+						if raw != "" {
+							reasoningEffort = &raw
+							matched = true
+						} else {
+							baseModel = modelName
+						}
 					} else {
-						baseModel = modelName
+						raw := strings.ToLower(strings.TrimSpace(value))
+						if raw != "" {
+							reasoningEffort = &raw
+							matched = true
+						} else {
+							baseModel = modelName
+						}
 					}
 				}
 			}

From 007572b58e2e6577f3c9a9a83d946e3b9c757437 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 15:52:14 +0800
Subject: [PATCH 09/15] fix(util): do not strip thinking suffix on registered
 models

NormalizeThinkingModel now checks ModelSupportsThinking before removing
"-thinking" or "-thinking-<ver>", avoiding accidental parsing of model
names where the suffix is part of the official id (e.g., kimi-k2-thinking,
qwen3-235b-a22b-thinking-2507).

The registry adds ThinkingSupport metadata for several models and
propagates it via ModelInfo (e.g., kimi-k2-thinking, deepseek-r1,
qwen3-235b-a22b-thinking-2507, minimax-m2), enabling accurate detection
of thinking-capable models and correcting base model inference.
---
 internal/registry/model_definitions.go | 10 ++++++----
 internal/util/thinking_suffix.go       | 19 +++++++++++++++----
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index 9956d964f..adaff867c 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -623,6 +623,7 @@ func GetIFlowModels() []*ModelInfo {
 		DisplayName string
 		Description string
 		Created     int64
+		Thinking    *ThinkingSupport
 	}{
 		{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
 		{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
@@ -632,17 +633,17 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
 		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
 		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
-		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 general model", Created: 1762387200},
+		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
 		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
 		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
 		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
-		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
+		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
 		{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
 		{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
-		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
+		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
 		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
 		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
-		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
+		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
 	}
 	models := make([]*ModelInfo, 0, len(entries))
 	for _, entry := range entries {
@@ -654,6 +655,7 @@ func GetIFlowModels() []*ModelInfo {
 			Type:        "iflow",
 			DisplayName: entry.DisplayName,
 			Description: entry.Description,
+			Thinking:    entry.Thinking,
 		})
 	}
 	return models
diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go
index 47ce42f75..ef8302b0e 100644
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -52,6 +52,11 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 		matched = true
 	default:
 		if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 {
+			// Skip stripping if the original model is a registered thinking model.
+			// This prevents "-thinking-2507" in "qwen3-235b-a22b-thinking-2507" from being parsed.
+			if ModelSupportsThinking(modelName) {
+				break
+			}
 			value := modelName[idx+len("-thinking-"):]
 			if value != "" {
 				if parsed, ok := parseIntPrefix(value); ok {
@@ -95,10 +100,16 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 				}
 			}
 		} else if strings.HasSuffix(lower, "-thinking") {
-			baseModel = modelName[:len(modelName)-len("-thinking")]
-			effort := "medium"
-			reasoningEffort = &effort
-			matched = true
+			candidateBase := modelName[:len(modelName)-len("-thinking")]
+			// Only strip the suffix if the original model is NOT a registered thinking model.
+			// This prevents stripping "-thinking" from models like "kimi-k2-thinking" where
+			// the suffix is part of the model's actual name.
+			if !ModelSupportsThinking(modelName) {
+				baseModel = candidateBase
+				effort := "medium"
+				reasoningEffort = &effort
+				matched = true
+			}
 		}
 	}
 

From f6300c72b790c6017a08ceacc425f9863907493d Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 16:21:50 +0800
Subject: [PATCH 10/15] fix(runtime): validate thinking config in iflow and
 qwen

---
 internal/runtime/executor/iflow_executor.go | 14 ++++++++++++--
 internal/runtime/executor/qwen_executor.go  | 14 ++++++++++++--
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go
index a445e47da..d1a69812d 100644
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -58,9 +58,14 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
-	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
+	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return resp, errValidate
+	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 
 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -144,9 +149,14 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
-	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
+	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return nil, errValidate
+	}
 	// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
 	toolsResult := gjson.GetBytes(body, "tools")
 	if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index d25ed5da7..2b8d0e502 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -52,9 +52,14 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
-	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
+	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return resp, errValidate
+	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
@@ -127,9 +132,14 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
-	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
+	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return nil, errValidate
+	}
 	toolsResult := gjson.GetBytes(body, "tools")
 	// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
 	// This will have no real consequences. It's just to scare Qwen3.

From 21bbceca0ce75e651f9dd0a29a681f2c580c661f Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 16:35:36 +0800
Subject: [PATCH 11/15] docs(runtime): document reasoning effort precedence

---
 internal/runtime/executor/payload_helpers.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index 61486d62c..9c45681ac 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -46,7 +46,8 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
 }
 
 // applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
-// Metadata values take precedence over any existing field when the model supports thinking.
+// Metadata values take precedence over any existing field when the model supports thinking, intentionally
+// overwriting caller-provided values to honor suffix/default metadata priority.
 func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
 	if len(metadata) == 0 {
 		return payload

From 6285459c08e9f6f5996374085053892d2d5b91fa Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 17:20:44 +0800
Subject: [PATCH 12/15] fix(runtime): unify claude thinking config resolution

---
 internal/runtime/executor/claude_executor.go | 56 +++-----------------
 internal/util/claude_thinking.go             | 46 ++++++++++++++++
 2 files changed, 52 insertions(+), 50 deletions(-)
 create mode 100644 internal/util/claude_thinking.go

diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go
index c7470954e..6af086080 100644
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -450,59 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
 	return betas, body
 }
 
-// injectThinkingConfig adds thinking configuration based on metadata or legacy suffixes.
+// injectThinkingConfig adds thinking configuration based on metadata using the unified flow.
+// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata
+// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini.
 func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
-	// Only inject if thinking config is not already present
-	if gjson.GetBytes(body, "thinking").Exists() {
+	budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata)
+	if !ok {
 		return body
 	}
-
-	budgetTokens, ok := resolveClaudeThinkingBudget(modelName, metadata)
-	if !ok || budgetTokens <= 0 {
-		return body
-	}
-
-	body, _ = sjson.SetBytes(body, "thinking.type", "enabled")
-	body, _ = sjson.SetBytes(body, "thinking.budget_tokens", budgetTokens)
-	return body
-}
-
-func resolveClaudeThinkingBudget(modelName string, metadata map[string]any) (int, bool) {
-	budget, include, effort, matched := util.ThinkingFromMetadata(metadata)
-	if matched {
-		if include != nil && !*include {
-			return 0, false
-		}
-		if budget != nil {
-			normalized := util.NormalizeThinkingBudget(modelName, *budget)
-			if normalized > 0 {
-				return normalized, true
-			}
-			return 0, false
-		}
-		if effort != nil {
-			if derived, ok := util.ThinkingEffortToBudget(modelName, *effort); ok && derived > 0 {
-				return derived, true
-			}
-		}
-	}
-	return claudeBudgetFromSuffix(modelName)
-}
-
-func claudeBudgetFromSuffix(modelName string) (int, bool) {
-	lower := strings.ToLower(strings.TrimSpace(modelName))
-	switch {
-	case strings.HasSuffix(lower, "-thinking-low"):
-		return 1024, true
-	case strings.HasSuffix(lower, "-thinking-medium"):
-		return 8192, true
-	case strings.HasSuffix(lower, "-thinking-high"):
-		return 24576, true
-	case strings.HasSuffix(lower, "-thinking"):
-		return 8192, true
-	default:
-		return 0, false
-	}
+	return util.ApplyClaudeThinkingConfig(body, budget)
 }
 
 // ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
diff --git a/internal/util/claude_thinking.go b/internal/util/claude_thinking.go
new file mode 100644
index 000000000..b0c5a0a2f
--- /dev/null
+++ b/internal/util/claude_thinking.go
@@ -0,0 +1,46 @@
+package util
+
+import (
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload.
+// It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget.
+// If budget is nil or the payload already has thinking config, it returns the payload unchanged.
+func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
+	if budget == nil {
+		return body
+	}
+	if gjson.GetBytes(body, "thinking").Exists() {
+		return body
+	}
+	if *budget <= 0 {
+		return body
+	}
+	updated := body
+	updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled")
+	updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget)
+	return updated
+}
+
+// ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models.
+// It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
+// Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
+func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
+	budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
+	if !matched {
+		return nil, false
+	}
+	if include != nil && !*include {
+		return nil, true
+	}
+	if budget == nil {
+		return nil, true
+	}
+	normalized := NormalizeThinkingBudget(modelName, *budget)
+	if normalized <= 0 {
+		return nil, true
+	}
+	return &normalized, true
+}

From facfe7c518cb528426dcb82c7f927e4f151bea33 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 18:17:28 +0800
Subject: [PATCH 13/15] refactor(thinking): use bracket tags for thinking meta

Align thinking suffix handling on a single bracket-style marker.

NormalizeThinkingModel strips a terminal `[value]` segment from
model identifiers and turns it into either a thinking budget (for
numeric values) or a reasoning effort hint (for strings). Emission
of `ThinkingIncludeThoughtsMetadataKey` is removed.

Executor helpers and the example config are updated so their
comments reference the new `[value]` suffix format instead of the
legacy dash variants.

BREAKING CHANGE: dash-based thinking suffixes (`-thinking`,
`-thinking-N`, `-reasoning`, `-nothinking`) are no longer parsed
for thinking metadata; only `[value]` annotations are recognized.
---
 config.example.yaml                          |   2 +-
 internal/runtime/executor/payload_helpers.go |   4 +-
 internal/util/thinking_suffix.go             | 122 ++++++-------------
 3 files changed, 41 insertions(+), 87 deletions(-)

diff --git a/config.example.yaml b/config.example.yaml
index dfd7454bd..31f169737 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -100,7 +100,7 @@ ws-auth: false
 #     excluded-models:
 #       - "claude-opus-4-5-20251101" # exclude specific models (exact match)
 #       - "claude-3-*"               # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
-#       - "*-think"                  # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
+#       - "*-thinking"               # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
 #       - "*haiku*"                  # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
 
 # OpenAI compatibility providers
diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index 9c45681ac..be2498685 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -11,7 +11,7 @@ import (
 	"github.com/tidwall/sjson"
 )
 
-// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
+// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., [high], [8192])
 // for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
 func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
@@ -28,7 +28,7 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string
 	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
 }
 
-// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
+// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., [high], [8192])
 // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
 func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go
index ef8302b0e..c9a68534e 100644
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -14,100 +14,57 @@ const (
 )
 
 // NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
-// the normalized base model with extracted metadata. Supported patterns:
-//   - "-thinking-<number>" extracts a numeric budget
-//   - "-thinking-<level>" extracts a reasoning effort level (minimal/low/medium/high/xhigh/auto/none)
-//   - "-thinking" maps to a default reasoning effort of "medium"
-//   - "-reasoning" maps to dynamic budget (-1) and include_thoughts=true
-//   - "-nothinking" maps to budget=0 and include_thoughts=false
+// the normalized base model with extracted metadata. Supported pattern:
+//   - "[<value>]" where value can be:
+//   - A numeric budget (e.g., "[8192]", "[16384]")
+//   - A reasoning effort level (e.g., "[high]", "[medium]", "[low]")
+//
+// Examples:
+//   - "claude-sonnet-4-5-20250929[16384]" → budget=16384
+//   - "gpt-5.1[high]" → reasoning_effort="high"
+//   - "gemini-2.5-pro[32768]" → budget=32768
+//
+// Note: Empty brackets "[]" are not supported and will be ignored.
 func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 	if modelName == "" {
 		return modelName, nil
 	}
 
-	lower := strings.ToLower(modelName)
 	baseModel := modelName
 
 	var (
 		budgetOverride  *int
-		includeThoughts *bool
 		reasoningEffort *string
 		matched         bool
 	)
 
-	switch {
-	case strings.HasSuffix(lower, "-nothinking"):
-		baseModel = modelName[:len(modelName)-len("-nothinking")]
-		budget := 0
-		include := false
-		budgetOverride = &budget
-		includeThoughts = &include
-		matched = true
-	case strings.HasSuffix(lower, "-reasoning"):
-		baseModel = modelName[:len(modelName)-len("-reasoning")]
-		budget := -1
-		include := true
-		budgetOverride = &budget
-		includeThoughts = &include
-		matched = true
-	default:
-		if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 {
-			// Skip stripping if the original model is a registered thinking model.
-			// This prevents "-thinking-2507" in "qwen3-235b-a22b-thinking-2507" from being parsed.
-			if ModelSupportsThinking(modelName) {
-				break
-			}
-			value := modelName[idx+len("-thinking-"):]
-			if value != "" {
-				if parsed, ok := parseIntPrefix(value); ok {
-					candidateBase := modelName[:idx]
-					if ModelUsesThinkingLevels(candidateBase) {
-						baseModel = candidateBase
-						// Numeric suffix on level-aware models should still surface as reasoning effort metadata.
-						raw := strings.ToLower(strings.TrimSpace(value))
-						if raw != "" {
-							reasoningEffort = &raw
-						}
-						matched = true
-					} else {
-						baseModel = candidateBase
-						budgetOverride = &parsed
-						matched = true
-					}
-				} else {
-					baseModel = modelName[:idx]
-					if normalized, ok := NormalizeReasoningEffortLevel(baseModel, value); ok {
-						reasoningEffort = &normalized
-						matched = true
-					} else if !ModelUsesThinkingLevels(baseModel) {
-						// Keep unknown effort tokens so callers can honor user intent even without normalization.
-						raw := strings.ToLower(strings.TrimSpace(value))
-						if raw != "" {
-							reasoningEffort = &raw
-							matched = true
-						} else {
-							baseModel = modelName
-						}
-					} else {
-						raw := strings.ToLower(strings.TrimSpace(value))
-						if raw != "" {
-							reasoningEffort = &raw
-							matched = true
-						} else {
-							baseModel = modelName
-						}
-					}
-				}
-			}
-		} else if strings.HasSuffix(lower, "-thinking") {
-			candidateBase := modelName[:len(modelName)-len("-thinking")]
-			// Only strip the suffix if the original model is NOT a registered thinking model.
-			// This prevents stripping "-thinking" from models like "kimi-k2-thinking" where
-			// the suffix is part of the model's actual name.
-			if !ModelSupportsThinking(modelName) {
-				baseModel = candidateBase
-				effort := "medium"
-				reasoningEffort = &effort
+	// Match "[value]" pattern at the end of the model name
+	if idx := strings.LastIndex(modelName, "["); idx != -1 {
+		if !strings.HasSuffix(modelName, "]") {
+			// Incomplete bracket, ignore
+			return baseModel, nil
+		}
+
+		value := modelName[idx+1 : len(modelName)-1] // Extract content between [ and ]
+		if value == "" {
+			// Empty brackets not supported
+			return baseModel, nil
+		}
+
+		candidateBase := modelName[:idx]
+
+		// Auto-detect: pure numeric → budget, string → reasoning effort level
+		if parsed, ok := parseIntPrefix(value); ok {
+			// Numeric value: treat as thinking budget
+			baseModel = candidateBase
+			budgetOverride = &parsed
+			matched = true
+		} else {
+			// String value: treat as reasoning effort level
+			baseModel = candidateBase
+			raw := strings.ToLower(strings.TrimSpace(value))
+			if raw != "" {
+				reasoningEffort = &raw
 				matched = true
 			}
 		}
@@ -123,9 +80,6 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 	if budgetOverride != nil {
 		metadata[ThinkingBudgetMetadataKey] = *budgetOverride
 	}
-	if includeThoughts != nil {
-		metadata[ThinkingIncludeThoughtsMetadataKey] = *includeThoughts
-	}
 	if reasoningEffort != nil {
 		metadata[ReasoningEffortMetadataKey] = *reasoningEffort
 	}

From e79f65fd8efbac89e02e7d39d8bde89878178239 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 18:39:07 +0800
Subject: [PATCH 14/15] refactor(thinking): use parentheses for metadata suffix

---
 internal/runtime/executor/payload_helpers.go |  4 +--
 internal/util/thinking_suffix.go             | 26 ++++++++++----------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index be2498685..9bc82f1f3 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -11,7 +11,7 @@ import (
 	"github.com/tidwall/sjson"
 )
 
-// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., [high], [8192])
+// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
 func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
@@ -28,7 +28,7 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string
 	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
 }
 
-// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., [high], [8192])
+// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
 func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go
index c9a68534e..7851c580f 100644
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -15,16 +15,16 @@ const (
 
 // NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
 // the normalized base model with extracted metadata. Supported pattern:
-//   - "[<value>]" where value can be:
-//   - A numeric budget (e.g., "[8192]", "[16384]")
-//   - A reasoning effort level (e.g., "[high]", "[medium]", "[low]")
+//   - "(<value>)" where value can be:
+//   - A numeric budget (e.g., "(8192)", "(16384)")
+//   - A reasoning effort level (e.g., "(high)", "(medium)", "(low)")
 //
 // Examples:
-//   - "claude-sonnet-4-5-20250929[16384]" → budget=16384
-//   - "gpt-5.1[high]" → reasoning_effort="high"
-//   - "gemini-2.5-pro[32768]" → budget=32768
+//   - "claude-sonnet-4-5-20250929(16384)" → budget=16384
+//   - "gpt-5.1(high)" → reasoning_effort="high"
+//   - "gemini-2.5-pro(32768)" → budget=32768
 //
-// Note: Empty brackets "[]" are not supported and will be ignored.
+// Note: Empty parentheses "()" are not supported and will be ignored.
 func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 	if modelName == "" {
 		return modelName, nil
@@ -38,16 +38,16 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 		matched         bool
 	)
 
-	// Match "[value]" pattern at the end of the model name
-	if idx := strings.LastIndex(modelName, "["); idx != -1 {
-		if !strings.HasSuffix(modelName, "]") {
-			// Incomplete bracket, ignore
+	// Match "(<value>)" pattern at the end of the model name
+	if idx := strings.LastIndex(modelName, "("); idx != -1 {
+		if !strings.HasSuffix(modelName, ")") {
+			// Incomplete parenthesis, ignore
 			return baseModel, nil
 		}
 
-		value := modelName[idx+1 : len(modelName)-1] // Extract content between [ and ]
+		value := modelName[idx+1 : len(modelName)-1] // Extract content between ( and )
 		if value == "" {
-			// Empty brackets not supported
+			// Empty parentheses not supported
 			return baseModel, nil
 		}
 

From 88bdd25f06b773763a48f9f83f358d84eea57846 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 20:12:06 +0800
Subject: [PATCH 15/15] fix(amp): set status on claude stream errors

---
 sdk/api/handlers/claude/code_handlers.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sdk/api/handlers/claude/code_handlers.go b/sdk/api/handlers/claude/code_handlers.go
index 63ea6065e..8a57a0cc6 100644
--- a/sdk/api/handlers/claude/code_handlers.go
+++ b/sdk/api/handlers/claude/code_handlers.go
@@ -271,6 +271,11 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
 				continue
 			}
 			if errMsg != nil {
+				status := http.StatusInternalServerError
+				if errMsg.StatusCode > 0 {
+					status = errMsg.StatusCode
+				}
+				c.Status(status)
 				// An error occurred: emit as a proper SSE error event
 				errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
 				_, _ = writer.WriteString("event: error\n")
@@ -278,6 +283,7 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
 				_, _ = writer.Write(errorBytes)
 				_, _ = writer.WriteString("\n\n")
 				_ = writer.Flush()
+				flusher.Flush()
 			}
 			var execErr error
 			if errMsg != nil {