From faee01f26dbae68d2d49448083906b4576b2a97f Mon Sep 17 00:00:00 2001
From: Adrian Lumpe <git@alumpe.de>
Date: Sat, 14 Mar 2026 22:14:08 +0100
Subject: [PATCH 1/3] fix: use correct token param for newer OpenAI models

---
 src/services/ReasoningService.ts | 46 ++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 5 deletions(-)

diff --git a/src/services/ReasoningService.ts b/src/services/ReasoningService.ts
index fcaa0c2d..e44d0430 100644
--- a/src/services/ReasoningService.ts
+++ b/src/services/ReasoningService.ts
@@ -508,8 +508,6 @@ class ReasoningService extends BaseReasoningService {
         { role: "user", content: userPrompt },
       ];
 
-      const isOlderModel = model && (model.startsWith("gpt-4") || model.startsWith("gpt-3"));
-
       const openAiBase = this.getConfiguredOpenAIBase();
       const endpointCandidates = this.getOpenAIEndpointCandidates(openAiBase);
       const isCustomEndpoint = openAiBase !== API_ENDPOINTS.OPENAI_BASE;
@@ -538,15 +536,31 @@ class ReasoningService extends BaseReasoningService {
           const controller = new AbortController();
           const timeoutId = setTimeout(() => controller.abort(), 30000);
           try {
+            const maxTokens =
+              config.maxTokens ||
+              Math.max(
+                4096,
+                this.calculateMaxTokens(
+                  text.length,
+                  TOKEN_LIMITS.MIN_TOKENS,
+                  TOKEN_LIMITS.MAX_TOKENS,
+                  TOKEN_LIMITS.TOKEN_MULTIPLIER
+                )
+              );
+
             const requestBody: any = { model };
 
             if (type === "responses") {
               requestBody.input = messages;
               requestBody.store = false;
+              requestBody.max_output_tokens = maxTokens;
             } else {
               requestBody.messages = messages;
-              if (isOlderModel) {
+              if (this.isOlderOpenAiModel(model)) {
                 requestBody.temperature = config.temperature || 0.3;
+                requestBody.max_tokens = maxTokens;
+              } else {
+                requestBody.max_completion_tokens = maxTokens;
               }
             }
 
@@ -1076,6 +1090,18 @@ class ReasoningService extends BaseReasoningService {
     }
   }
 
+  // Returns true for legacy OpenAI Chat Completions models that use max_tokens and temperature.
+  // Newer models (gpt-4.1+, gpt-5+) require max_completion_tokens and reject temperature.
+  private isOlderOpenAiModel(model: string): boolean {
+    if (!model) return false;
+    return (
+      model.startsWith("gpt-3") ||
+      model.startsWith("gpt-4o") ||
+      model.startsWith("gpt-4-") ||
+      model === "gpt-4"
+    );
+  }
+
   private getCustomPrompt(): string | undefined {
     try {
       const raw = localStorage.getItem("customUnifiedPrompt");
@@ -1124,14 +1150,24 @@ class ReasoningService extends BaseReasoningService {
       }
     }
 
+    const useOldTokenParam =
+      isLocalProvider || provider === "groq" || this.isOlderOpenAiModel(model);
+
     const requestBody: Record<string, unknown> = {
       model,
       messages,
       stream: true,
-      temperature: config.temperature ?? 0.3,
-      max_tokens: config.maxTokens || Math.max(4096, TOKEN_LIMITS.MAX_TOKENS),
     };
 
+    const maxTokens = config.maxTokens || Math.max(4096, TOKEN_LIMITS.MAX_TOKENS);
+    
+    if (useOldTokenParam) {
+      requestBody.temperature = config.temperature ?? 0.3;
+      requestBody.max_tokens = maxTokens;
+    } else {
+      requestBody.max_completion_tokens = maxTokens;
+    }
+
     logger.logReasoning("AGENT_STREAM_REQUEST", {
       endpoint,
       model,

From ee18a9823501380754737abaface03e2f4fcd63b Mon Sep 17 00:00:00 2001
From: Gabriel Stein <gabrielstein416@gmail.com>
Date: Sun, 15 Mar 2026 12:55:48 -0700
Subject: [PATCH 2/3] fix: use registry metadata for OpenAI API params, add
 GPT-5.4

- Add tokenParam and supportsTemperature fields to CloudModelDefinition
  and model registry, replacing fragile string prefix matching
- Add getOpenAiApiConfig() helper with fallback for unregistered models
- Fix temperature regression: gpt-4.1 models now correctly send
  temperature (they support it), gpt-5 models correctly omit it
- Add GPT-5.4 as new flagship model across all 10 locales
- Remove isOlderOpenAiModel() in favor of registry-driven lookup
---
 src/locales/de/translation.json    |  3 ++-
 src/locales/en/translation.json    |  3 ++-
 src/locales/es/translation.json    |  3 ++-
 src/locales/fr/translation.json    |  3 ++-
 src/locales/it/translation.json    |  3 ++-
 src/locales/ja/translation.json    |  3 ++-
 src/locales/pt/translation.json    |  3 ++-
 src/locales/ru/translation.json    |  3 ++-
 src/locales/zh-CN/translation.json |  3 ++-
 src/locales/zh-TW/translation.json |  3 ++-
 src/models/ModelRegistry.ts        | 36 ++++++++++++++++++++++++++++++
 src/models/modelRegistryData.json  | 34 ++++++++++++++++++++++------
 src/services/ReasoningService.ts   | 32 +++++++++-----------------
 13 files changed, 94 insertions(+), 38 deletions(-)

diff --git a/src/locales/de/translation.json b/src/locales/de/translation.json
index 5f4efca5..cd7ca322 100644
--- a/src/locales/de/translation.json
+++ b/src/locales/de/translation.json
@@ -1293,7 +1293,8 @@
         "mistral_voxtral_mini_latest": "Schnelle mehrsprachige Transkription"
       },
       "cloud": {
-        "openai_gpt_5_2": "Neuestes Flaggschiff-Modell für Reasoning",
+        "openai_gpt_5_4": "Frontier-Modell für komplexes Reasoning",
+        "openai_gpt_5_2": "Starkes Reasoning-Modell",
         "openai_gpt_5_mini": "Schnell und kosteneffizient",
         "openai_gpt_5_nano": "Ultraschnell, niedrige Latenz",
         "openai_gpt_4_1": "Starke Basis, 1M Kontext",
diff --git a/src/locales/en/translation.json b/src/locales/en/translation.json
index 78c977de..2ecfdad4 100644
--- a/src/locales/en/translation.json
+++ b/src/locales/en/translation.json
@@ -1369,7 +1369,8 @@
         "mistral_voxtral_mini_latest": "Fast multilingual transcription"
       },
       "cloud": {
-        "openai_gpt_5_2": "Latest flagship reasoning model",
+        "openai_gpt_5_4": "Frontier model for complex reasoning",
+        "openai_gpt_5_2": "Strong reasoning model",
         "openai_gpt_5_mini": "Fast and cost-efficient",
         "openai_gpt_5_nano": "Ultra-fast, low latency",
         "openai_gpt_4_1": "Strong baseline, 1M context",
diff --git a/src/locales/es/translation.json b/src/locales/es/translation.json
index 55785ec4..1a50b6b3 100644
--- a/src/locales/es/translation.json
+++ b/src/locales/es/translation.json
@@ -1297,7 +1297,8 @@
         "mistral_voxtral_mini_latest": "Transcripción multilingüe rápida"
       },
       "cloud": {
-        "openai_gpt_5_2": "Modelo insignia de razonamiento más reciente",
+        "openai_gpt_5_4": "Modelo frontier para razonamiento complejo",
+        "openai_gpt_5_2": "Modelo fuerte de razonamiento",
         "openai_gpt_5_mini": "Rápido y económico",
         "openai_gpt_5_nano": "Ultrarrápido, baja latencia",
         "openai_gpt_4_1": "Base sólida, contexto de 1M",
diff --git a/src/locales/fr/translation.json b/src/locales/fr/translation.json
index 1e5617ab..3c6721aa 100644
--- a/src/locales/fr/translation.json
+++ b/src/locales/fr/translation.json
@@ -1297,7 +1297,8 @@
         "mistral_voxtral_mini_latest": "Transcription multilingue rapide"
       },
       "cloud": {
-        "openai_gpt_5_2": "Dernier modèle phare pour le raisonnement",
+        "openai_gpt_5_4": "Modèle frontier pour le raisonnement complexe",
+        "openai_gpt_5_2": "Modèle de raisonnement performant",
         "openai_gpt_5_mini": "Rapide et économique",
         "openai_gpt_5_nano": "Ultra-rapide, faible latence",
         "openai_gpt_4_1": "Base solide, contexte 1M",
diff --git a/src/locales/it/translation.json b/src/locales/it/translation.json
index b748525c..19e50e81 100644
--- a/src/locales/it/translation.json
+++ b/src/locales/it/translation.json
@@ -1297,7 +1297,8 @@
         "mistral_voxtral_mini_latest": "Trascrizione multilingue veloce"
       },
       "cloud": {
-        "openai_gpt_5_2": "Ultimo modello di punta per il ragionamento",
+        "openai_gpt_5_4": "Modello frontier per ragionamento complesso",
+        "openai_gpt_5_2": "Modello di ragionamento potente",
         "openai_gpt_5_mini": "Veloce ed efficiente nei costi",
         "openai_gpt_5_nano": "Ultra-veloce, bassa latenza",
         "openai_gpt_4_1": "Base solida, contesto da 1M",
diff --git a/src/locales/ja/translation.json b/src/locales/ja/translation.json
index 9bdad109..2c47804b 100644
--- a/src/locales/ja/translation.json
+++ b/src/locales/ja/translation.json
@@ -1293,7 +1293,8 @@
         "mistral_voxtral_mini_latest": "高速多言語文字起こし"
       },
       "cloud": {
-        "openai_gpt_5_2": "最新のフラッグシップ推論モデル",
+        "openai_gpt_5_4": "複雑な推論のためのフロンティアモデル",
+        "openai_gpt_5_2": "強力な推論モデル",
         "openai_gpt_5_mini": "高速で費用対効果が高い",
         "openai_gpt_5_nano": "超高速、低レイテンシー",
         "openai_gpt_4_1": "強力なベースライン、100 万トークンコンテキスト",
diff --git a/src/locales/pt/translation.json b/src/locales/pt/translation.json
index c3ca9aab..e541eeda 100644
--- a/src/locales/pt/translation.json
+++ b/src/locales/pt/translation.json
@@ -1269,7 +1269,8 @@
         "mistral_voxtral_mini_latest": "Transcrição multilíngue rápida"
       },
       "cloud": {
-        "openai_gpt_5_2": "Modelo principal de raciocínio mais recente",
+        "openai_gpt_5_4": "Modelo frontier para raciocínio complexo",
+        "openai_gpt_5_2": "Modelo forte de raciocínio",
         "openai_gpt_5_mini": "Rápido e eficiente em custo",
         "openai_gpt_5_nano": "Ultrarápido, baixa latência",
         "openai_gpt_4_1": "Base sólida, contexto de 1M",
diff --git a/src/locales/ru/translation.json b/src/locales/ru/translation.json
index 6d209898..a9a5dc20 100644
--- a/src/locales/ru/translation.json
+++ b/src/locales/ru/translation.json
@@ -1297,7 +1297,8 @@
         "mistral_voxtral_mini_latest": "Быстрая многоязычная транскрипция"
       },
       "cloud": {
-        "openai_gpt_5_2": "Новейшая флагманская модель с рассуждением",
+        "openai_gpt_5_4": "Фронтирная модель для сложного рассуждения",
+        "openai_gpt_5_2": "Сильная модель рассуждения",
         "openai_gpt_5_mini": "Быстрая и экономичная",
         "openai_gpt_5_nano": "Сверхбыстрая, низкая задержка",
         "openai_gpt_4_1": "Надёжная базовая модель, контекст 1M",
diff --git a/src/locales/zh-CN/translation.json b/src/locales/zh-CN/translation.json
index ba3b7046..8e950558 100644
--- a/src/locales/zh-CN/translation.json
+++ b/src/locales/zh-CN/translation.json
@@ -1293,7 +1293,8 @@
         "mistral_voxtral_mini_latest": "快速多语言转录"
       },
       "cloud": {
-        "openai_gpt_5_2": "最新旗舰推理模型",
+        "openai_gpt_5_4": "复杂推理前沿模型",
+        "openai_gpt_5_2": "强大的推理模型",
         "openai_gpt_5_mini": "快速且经济",
         "openai_gpt_5_nano": "超快速、低延迟",
         "openai_gpt_4_1": "稳定基准，1M 上下文",
diff --git a/src/locales/zh-TW/translation.json b/src/locales/zh-TW/translation.json
index b5c023d6..69cf8d62 100644
--- a/src/locales/zh-TW/translation.json
+++ b/src/locales/zh-TW/translation.json
@@ -1293,7 +1293,8 @@
         "mistral_voxtral_mini_latest": "快速多語言轉錄"
       },
       "cloud": {
-        "openai_gpt_5_2": "最新旗艦推理模型",
+        "openai_gpt_5_4": "複雜推理前沿模型",
+        "openai_gpt_5_2": "強大的推理模型",
         "openai_gpt_5_mini": "快速且經濟實惠",
         "openai_gpt_5_nano": "超快速，低延遲",
         "openai_gpt_4_1": "穩健基礎，1M 上下文",
diff --git a/src/models/ModelRegistry.ts b/src/models/ModelRegistry.ts
index 7cd5b8a9..6b7ca7c0 100644
--- a/src/models/ModelRegistry.ts
+++ b/src/models/ModelRegistry.ts
@@ -38,6 +38,8 @@ export interface CloudModelDefinition {
   description: string;
   descriptionKey?: string;
   disableThinking?: boolean;
+  tokenParam?: "max_tokens" | "max_completion_tokens";
+  supportsTemperature?: boolean;
 }
 
 export interface CloudProviderData {
@@ -327,6 +329,40 @@ export function getCloudModel(modelId: string): CloudModelDefinition | undefined
   return undefined;
 }
 
+export interface OpenAiApiConfig {
+  tokenParam: "max_tokens" | "max_completion_tokens";
+  supportsTemperature: boolean;
+}
+
+export function getOpenAiApiConfig(modelId: string): OpenAiApiConfig {
+  const model = getCloudModel(modelId);
+  if (model?.tokenParam) {
+    return {
+      tokenParam: model.tokenParam,
+      supportsTemperature: model.supportsTemperature ?? true,
+    };
+  }
+
+  // Fallback for models not in the registry (custom model IDs, etc.)
+  const isLegacy =
+    modelId.startsWith("gpt-3") ||
+    modelId.startsWith("gpt-4o") ||
+    modelId.startsWith("gpt-4-") ||
+    modelId === "gpt-4";
+
+  if (isLegacy) {
+    return { tokenParam: "max_tokens", supportsTemperature: true };
+  }
+
+  // gpt-4.1* supports temperature but uses max_completion_tokens
+  if (modelId.startsWith("gpt-4.1")) {
+    return { tokenParam: "max_completion_tokens", supportsTemperature: true };
+  }
+
+  // gpt-5* reasoning models: no temperature
+  return { tokenParam: "max_completion_tokens", supportsTemperature: false };
+}
+
 export function getParakeetModels(): ParakeetModelsMap {
   return modelData.parakeetModels;
 }
diff --git a/src/models/modelRegistryData.json b/src/models/modelRegistryData.json
index 96fc97b1..50b5cf58 100644
--- a/src/models/modelRegistryData.json
+++ b/src/models/modelRegistryData.json
@@ -166,41 +166,61 @@
       "id": "openai",
       "name": "OpenAI",
       "models": [
+        {
+          "id": "gpt-5.4",
+          "name": "GPT-5.4",
+          "description": "Frontier model for complex reasoning",
+          "descriptionKey": "models.descriptions.cloud.openai_gpt_5_4",
+          "tokenParam": "max_completion_tokens",
+          "supportsTemperature": false
+        },
         {
           "id": "gpt-5.2",
           "name": "GPT-5.2",
-          "description": "Latest flagship reasoning model",
-          "descriptionKey": "models.descriptions.cloud.openai_gpt_5_2"
+          "description": "Strong reasoning model",
+          "descriptionKey": "models.descriptions.cloud.openai_gpt_5_2",
+          "tokenParam": "max_completion_tokens",
+          "supportsTemperature": false
         },
         {
           "id": "gpt-5-mini",
           "name": "GPT-5 Mini",
           "description": "Fast and cost-efficient",
-          "descriptionKey": "models.descriptions.cloud.openai_gpt_5_mini"
+          "descriptionKey": "models.descriptions.cloud.openai_gpt_5_mini",
+          "tokenParam": "max_completion_tokens",
+          "supportsTemperature": false
         },
         {
           "id": "gpt-5-nano",
           "name": "GPT-5 Nano",
           "description": "Ultra-fast, low latency",
-          "descriptionKey": "models.descriptions.cloud.openai_gpt_5_nano"
+          "descriptionKey": "models.descriptions.cloud.openai_gpt_5_nano",
+          "tokenParam": "max_completion_tokens",
+          "supportsTemperature": false
         },
         {
           "id": "gpt-4.1",
           "name": "GPT-4.1",
           "description": "Strong baseline, 1M context",
-          "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1"
+          "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1",
+          "tokenParam": "max_completion_tokens",
+          "supportsTemperature": true
         },
         {
           "id": "gpt-4.1-mini",
           "name": "GPT-4.1 Mini",
           "description": "Smaller GPT-4.1 model",
-          "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_mini"
+          "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_mini",
+          "tokenParam": "max_completion_tokens",
+          "supportsTemperature": true
         },
         {
           "id": "gpt-4.1-nano",
           "name": "GPT-4.1 Nano",
           "description": "Lowest latency GPT-4.1",
-          "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_nano"
+          "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_nano",
+          "tokenParam": "max_completion_tokens",
+          "supportsTemperature": true
         }
       ]
     },
diff --git a/src/services/ReasoningService.ts b/src/services/ReasoningService.ts
index e44d0430..633efc3e 100644
--- a/src/services/ReasoningService.ts
+++ b/src/services/ReasoningService.ts
@@ -1,4 +1,4 @@
-import { getModelProvider, getCloudModel } from "../models/ModelRegistry";
+import { getModelProvider, getCloudModel, getOpenAiApiConfig } from "../models/ModelRegistry";
 import { BaseReasoningService, ReasoningConfig } from "./BaseReasoningService";
 import { SecureCache } from "../utils/SecureCache";
 import { withRetry, createApiRetryStrategy } from "../utils/retry";
@@ -548,6 +548,7 @@ class ReasoningService extends BaseReasoningService {
                 )
               );
 
+            const apiConfig = getOpenAiApiConfig(model);
             const requestBody: any = { model };
 
             if (type === "responses") {
@@ -556,11 +557,9 @@ class ReasoningService extends BaseReasoningService {
               requestBody.max_output_tokens = maxTokens;
             } else {
               requestBody.messages = messages;
-              if (this.isOlderOpenAiModel(model)) {
+              requestBody[apiConfig.tokenParam] = maxTokens;
+              if (apiConfig.supportsTemperature) {
                 requestBody.temperature = config.temperature || 0.3;
-                requestBody.max_tokens = maxTokens;
-              } else {
-                requestBody.max_completion_tokens = maxTokens;
               }
             }
 
@@ -1090,18 +1089,6 @@ class ReasoningService extends BaseReasoningService {
     }
   }
 
-  // Returns true for legacy OpenAI Chat Completions models that use max_tokens and temperature.
-  // Newer models (gpt-4.1+, gpt-5+) require max_completion_tokens and reject temperature.
-  private isOlderOpenAiModel(model: string): boolean {
-    if (!model) return false;
-    return (
-      model.startsWith("gpt-3") ||
-      model.startsWith("gpt-4o") ||
-      model.startsWith("gpt-4-") ||
-      model === "gpt-4"
-    );
-  }
-
   private getCustomPrompt(): string | undefined {
     try {
       const raw = localStorage.getItem("customUnifiedPrompt");
@@ -1150,8 +1137,8 @@ class ReasoningService extends BaseReasoningService {
       }
     }
 
-    const useOldTokenParam =
-      isLocalProvider || provider === "groq" || this.isOlderOpenAiModel(model);
+    const apiConfig = getOpenAiApiConfig(model);
+    const useOldTokenParam = isLocalProvider || provider === "groq";
 
     const requestBody: Record<string, unknown> = {
       model,
@@ -1160,12 +1147,15 @@ class ReasoningService extends BaseReasoningService {
     };
 
     const maxTokens = config.maxTokens || Math.max(4096, TOKEN_LIMITS.MAX_TOKENS);
-    
+
     if (useOldTokenParam) {
       requestBody.temperature = config.temperature ?? 0.3;
       requestBody.max_tokens = maxTokens;
     } else {
-      requestBody.max_completion_tokens = maxTokens;
+      requestBody[apiConfig.tokenParam] = maxTokens;
+      if (apiConfig.supportsTemperature) {
+        requestBody.temperature = config.temperature ?? 0.3;
+      }
     }
 
     logger.logReasoning("AGENT_STREAM_REQUEST", {

From eab0a6107b540be829d20d62681e5c03b12ef08d Mon Sep 17 00:00:00 2001
From: Gabriel Stein <gabrielstein416@gmail.com>
Date: Sun, 15 Mar 2026 12:59:05 -0700
Subject: [PATCH 3/3] fix: send temperature on Responses API for models that
 support it

gpt-4.1 models support temperature on both the Responses API and
Chat Completions endpoints. Move the temperature check outside the
if/else branch so it applies to both API paths.
---
 src/services/ReasoningService.ts | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/services/ReasoningService.ts b/src/services/ReasoningService.ts
index 987d1aae..f822d99e 100644
--- a/src/services/ReasoningService.ts
+++ b/src/services/ReasoningService.ts
@@ -558,9 +558,10 @@ class ReasoningService extends BaseReasoningService {
             } else {
               requestBody.messages = messages;
               requestBody[apiConfig.tokenParam] = maxTokens;
-              if (apiConfig.supportsTemperature) {
-                requestBody.temperature = config.temperature || 0.3;
-              }
+            }
+
+            if (apiConfig.supportsTemperature) {
+              requestBody.temperature = config.temperature || 0.3;
             }
 
             const res = await fetch(endpoint, {