OpenWhispr · gabrielste1n · Mar 15, 2026 · Mar 14, 2026 · Mar 15, 2026 · Mar 15, 2026
diff --git a/src/locales/de/translation.json b/src/locales/de/translation.json
@@ -1364,7 +1364,8 @@
         "mistral_voxtral_mini_latest": "Schnelle mehrsprachige Transkription"
       },
       "cloud": {
-        "openai_gpt_5_2": "Neuestes Flaggschiff-Modell für Reasoning",
+        "openai_gpt_5_4": "Frontier-Modell für komplexes Reasoning",
+        "openai_gpt_5_2": "Starkes Reasoning-Modell",
         "openai_gpt_5_mini": "Schnell und kosteneffizient",
         "openai_gpt_5_nano": "Ultraschnell, niedrige Latenz",
         "openai_gpt_4_1": "Starke Basis, 1M Kontext",

diff --git a/src/locales/en/translation.json b/src/locales/en/translation.json
@@ -1434,7 +1434,8 @@
         "mistral_voxtral_mini_latest": "Fast multilingual transcription"
       },
       "cloud": {
-        "openai_gpt_5_2": "Latest flagship reasoning model",
+        "openai_gpt_5_4": "Frontier model for complex reasoning",
+        "openai_gpt_5_2": "Strong reasoning model",
         "openai_gpt_5_mini": "Fast and cost-efficient",
         "openai_gpt_5_nano": "Ultra-fast, low latency",
         "openai_gpt_4_1": "Strong baseline, 1M context",

diff --git a/src/locales/es/translation.json b/src/locales/es/translation.json
@@ -1364,7 +1364,8 @@
         "mistral_voxtral_mini_latest": "Transcripción multilingüe rápida"
       },
       "cloud": {
-        "openai_gpt_5_2": "Modelo insignia de razonamiento más reciente",
+        "openai_gpt_5_4": "Modelo frontier para razonamiento complejo",
+        "openai_gpt_5_2": "Modelo fuerte de razonamiento",
         "openai_gpt_5_mini": "Rápido y económico",
         "openai_gpt_5_nano": "Ultrarrápido, baja latencia",
         "openai_gpt_4_1": "Base sólida, contexto de 1M",

diff --git a/src/locales/fr/translation.json b/src/locales/fr/translation.json
@@ -1364,7 +1364,8 @@
         "mistral_voxtral_mini_latest": "Transcription multilingue rapide"
       },
       "cloud": {
-        "openai_gpt_5_2": "Dernier modèle phare pour le raisonnement",
+        "openai_gpt_5_4": "Modèle frontier pour le raisonnement complexe",
+        "openai_gpt_5_2": "Modèle de raisonnement performant",
         "openai_gpt_5_mini": "Rapide et économique",
         "openai_gpt_5_nano": "Ultra-rapide, faible latence",
         "openai_gpt_4_1": "Base solide, contexte 1M",

diff --git a/src/locales/it/translation.json b/src/locales/it/translation.json
@@ -1364,7 +1364,8 @@
         "mistral_voxtral_mini_latest": "Trascrizione multilingue veloce"
       },
       "cloud": {
-        "openai_gpt_5_2": "Ultimo modello di punta per il ragionamento",
+        "openai_gpt_5_4": "Modello frontier per ragionamento complesso",
+        "openai_gpt_5_2": "Modello di ragionamento potente",
         "openai_gpt_5_mini": "Veloce ed efficiente nei costi",
         "openai_gpt_5_nano": "Ultra-veloce, bassa latenza",
         "openai_gpt_4_1": "Base solida, contesto da 1M",

diff --git a/src/locales/ja/translation.json b/src/locales/ja/translation.json
@@ -1364,7 +1364,8 @@
         "mistral_voxtral_mini_latest": "高速多言語文字起こし"
       },
       "cloud": {
-        "openai_gpt_5_2": "最新のフラッグシップ推論モデル",
+        "openai_gpt_5_4": "複雑な推論のためのフロンティアモデル",
+        "openai_gpt_5_2": "強力な推論モデル",
         "openai_gpt_5_mini": "高速で費用対効果が高い",
         "openai_gpt_5_nano": "超高速、低レイテンシー",
         "openai_gpt_4_1": "強力なベースライン、100 万トークンコンテキスト",

diff --git a/src/locales/pt/translation.json b/src/locales/pt/translation.json
@@ -1336,7 +1336,8 @@
         "mistral_voxtral_mini_latest": "Transcrição multilíngue rápida"
       },
       "cloud": {
-        "openai_gpt_5_2": "Modelo principal de raciocínio mais recente",
+        "openai_gpt_5_4": "Modelo frontier para raciocínio complexo",
+        "openai_gpt_5_2": "Modelo forte de raciocínio",
         "openai_gpt_5_mini": "Rápido e eficiente em custo",
         "openai_gpt_5_nano": "Ultrarápido, baixa latência",
         "openai_gpt_4_1": "Base sólida, contexto de 1M",

diff --git a/src/locales/ru/translation.json b/src/locales/ru/translation.json
@@ -1364,7 +1364,8 @@
         "mistral_voxtral_mini_latest": "Быстрая многоязычная транскрипция"
       },
       "cloud": {
-        "openai_gpt_5_2": "Новейшая флагманская модель с рассуждением",
+        "openai_gpt_5_4": "Фронтирная модель для сложного рассуждения",
+        "openai_gpt_5_2": "Сильная модель рассуждения",
         "openai_gpt_5_mini": "Быстрая и экономичная",
         "openai_gpt_5_nano": "Сверхбыстрая, низкая задержка",
         "openai_gpt_4_1": "Надёжная базовая модель, контекст 1M",

diff --git a/src/locales/zh-CN/translation.json b/src/locales/zh-CN/translation.json
@@ -1364,7 +1364,8 @@
         "mistral_voxtral_mini_latest": "快速多语言转录"
       },
       "cloud": {
-        "openai_gpt_5_2": "最新旗舰推理模型",
+        "openai_gpt_5_4": "复杂推理前沿模型",
+        "openai_gpt_5_2": "强大的推理模型",
         "openai_gpt_5_mini": "快速且经济",
         "openai_gpt_5_nano": "超快速、低延迟",
         "openai_gpt_4_1": "稳定基准，1M 上下文",

diff --git a/src/locales/zh-TW/translation.json b/src/locales/zh-TW/translation.json
@@ -1364,7 +1364,8 @@
         "mistral_voxtral_mini_latest": "快速多語言轉錄"
       },
       "cloud": {
-        "openai_gpt_5_2": "最新旗艦推理模型",
+        "openai_gpt_5_4": "複雜推理前沿模型",
+        "openai_gpt_5_2": "強大的推理模型",
         "openai_gpt_5_mini": "快速且經濟實惠",
         "openai_gpt_5_nano": "超快速，低延遲",
         "openai_gpt_4_1": "穩健基礎，1M 上下文",

diff --git a/src/models/ModelRegistry.ts b/src/models/ModelRegistry.ts
@@ -38,6 +38,8 @@ export interface CloudModelDefinition {
   description: string;
   descriptionKey?: string;
   disableThinking?: boolean;
+  tokenParam?: "max_tokens" | "max_completion_tokens";
+  supportsTemperature?: boolean;
 }
 
 export interface CloudProviderData {
@@ -327,6 +329,40 @@ export function getCloudModel(modelId: string): CloudModelDefinition | undefined
   return undefined;
 }
 
+export interface OpenAiApiConfig {
+  tokenParam: "max_tokens" | "max_completion_tokens";
+  supportsTemperature: boolean;
+}
+
+export function getOpenAiApiConfig(modelId: string): OpenAiApiConfig {
+  const model = getCloudModel(modelId);
+  if (model?.tokenParam) {
+    return {
+      tokenParam: model.tokenParam,
+      supportsTemperature: model.supportsTemperature ?? true,
+    };
+  }
+
+  // Fallback for models not in the registry (custom model IDs, etc.)
+  const isLegacy =
+    modelId.startsWith("gpt-3") ||
+    modelId.startsWith("gpt-4o") ||
+    modelId.startsWith("gpt-4-") ||
+    modelId === "gpt-4";
+
+  if (isLegacy) {
+    return { tokenParam: "max_tokens", supportsTemperature: true };
+  }
+
+  // gpt-4.1* supports temperature but uses max_completion_tokens
+  if (modelId.startsWith("gpt-4.1")) {
+    return { tokenParam: "max_completion_tokens", supportsTemperature: true };
+  }
+
+  // gpt-5* reasoning models: no temperature
+  return { tokenParam: "max_completion_tokens", supportsTemperature: false };
+}
+
 export function getParakeetModels(): ParakeetModelsMap {
   return modelData.parakeetModels;
 }

diff --git a/src/models/modelRegistryData.json b/src/models/modelRegistryData.json
@@ -166,41 +166,61 @@
       "id": "openai",
       "name": "OpenAI",
       "models": [
+        {
+          "id": "gpt-5.4",
+          "name": "GPT-5.4",
+          "description": "Frontier model for complex reasoning",
+          "descriptionKey": "models.descriptions.cloud.openai_gpt_5_4",
+          "tokenParam": "max_completion_tokens",
+          "supportsTemperature": false
+        },
         {
           "id": "gpt-5.2",
           "name": "GPT-5.2",
-          "description": "Latest flagship reasoning model",
-          "descriptionKey": "models.descriptions.cloud.openai_gpt_5_2"
+          "description": "Strong reasoning model",
+          "descriptionKey": "models.descriptions.cloud.openai_gpt_5_2",
+          "tokenParam": "max_completion_tokens",
+          "supportsTemperature": false
         },
         {
           "id": "gpt-5-mini",
           "name": "GPT-5 Mini",
           "description": "Fast and cost-efficient",
-          "descriptionKey": "models.descriptions.cloud.openai_gpt_5_mini"
+          "descriptionKey": "models.descriptions.cloud.openai_gpt_5_mini",
+          "tokenParam": "max_completion_tokens",
+          "supportsTemperature": false
         },
         {
           "id": "gpt-5-nano",
           "name": "GPT-5 Nano",
           "description": "Ultra-fast, low latency",
-          "descriptionKey": "models.descriptions.cloud.openai_gpt_5_nano"
+          "descriptionKey": "models.descriptions.cloud.openai_gpt_5_nano",
+          "tokenParam": "max_completion_tokens",
+          "supportsTemperature": false
         },
         {
           "id": "gpt-4.1",
           "name": "GPT-4.1",
           "description": "Strong baseline, 1M context",
-          "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1"
+          "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1",
+          "tokenParam": "max_completion_tokens",
+          "supportsTemperature": true
         },
         {
           "id": "gpt-4.1-mini",
           "name": "GPT-4.1 Mini",
           "description": "Smaller GPT-4.1 model",
-          "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_mini"
+          "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_mini",
+          "tokenParam": "max_completion_tokens",
+          "supportsTemperature": true
         },
         {
           "id": "gpt-4.1-nano",
           "name": "GPT-4.1 Nano",
           "description": "Lowest latency GPT-4.1",
-          "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_nano"
+          "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_nano",
+          "tokenParam": "max_completion_tokens",
+          "supportsTemperature": true
         }
       ]
     },

diff --git a/src/services/ReasoningService.ts b/src/services/ReasoningService.ts
@@ -1,4 +1,4 @@
-import { getModelProvider, getCloudModel } from "../models/ModelRegistry";
+import { getModelProvider, getCloudModel, getOpenAiApiConfig } from "../models/ModelRegistry";
 import { BaseReasoningService, ReasoningConfig } from "./BaseReasoningService";
 import { SecureCache } from "../utils/SecureCache";
 import { withRetry, createApiRetryStrategy } from "../utils/retry";
@@ -508,8 +508,6 @@ class ReasoningService extends BaseReasoningService {
         { role: "user", content: userPrompt },
       ];
 
-      const isOlderModel = model && (model.startsWith("gpt-4") || model.startsWith("gpt-3"));
-
       const openAiBase = this.getConfiguredOpenAIBase();
       const endpointCandidates = this.getOpenAIEndpointCandidates(openAiBase);
       const isCustomEndpoint = openAiBase !== API_ENDPOINTS.OPENAI_BASE;
@@ -538,16 +536,32 @@ class ReasoningService extends BaseReasoningService {
           const controller = new AbortController();
           const timeoutId = setTimeout(() => controller.abort(), 30000);
           try {
+            const maxTokens =
+              config.maxTokens ||
+              Math.max(
+                4096,
+                this.calculateMaxTokens(
+                  text.length,
+                  TOKEN_LIMITS.MIN_TOKENS,
+                  TOKEN_LIMITS.MAX_TOKENS,
+                  TOKEN_LIMITS.TOKEN_MULTIPLIER
+                )
+              );
+
+            const apiConfig = getOpenAiApiConfig(model);
             const requestBody: any = { model };
 
             if (type === "responses") {
               requestBody.input = messages;
               requestBody.store = false;
+              requestBody.max_output_tokens = maxTokens;
             } else {
               requestBody.messages = messages;
-              if (isOlderModel) {
-                requestBody.temperature = config.temperature || 0.3;
-              }
+              requestBody[apiConfig.tokenParam] = maxTokens;
+            }
+
+            if (apiConfig.supportsTemperature) {
+              requestBody.temperature = config.temperature || 0.3;
             }
 
             const res = await fetch(endpoint, {
@@ -1124,14 +1138,27 @@ class ReasoningService extends BaseReasoningService {
       }
     }
 
+    const apiConfig = getOpenAiApiConfig(model);
+    const useOldTokenParam = isLocalProvider || provider === "groq";
+
     const requestBody: Record<string, unknown> = {
       model,
       messages,
       stream: true,
-      temperature: config.temperature ?? 0.3,
-      max_tokens: config.maxTokens || Math.max(4096, TOKEN_LIMITS.MAX_TOKENS),
     };
 
+    const maxTokens = config.maxTokens || Math.max(4096, TOKEN_LIMITS.MAX_TOKENS);
+
+    if (useOldTokenParam) {
+      requestBody.temperature = config.temperature ?? 0.3;
+      requestBody.max_tokens = maxTokens;
+    } else {
+      requestBody[apiConfig.tokenParam] = maxTokens;
+      if (apiConfig.supportsTemperature) {
+        requestBody.temperature = config.temperature ?? 0.3;
+      }
+    }
+
     logger.logReasoning("AGENT_STREAM_REQUEST", {
       endpoint,
       model,