From faee01f26dbae68d2d49448083906b4576b2a97f Mon Sep 17 00:00:00 2001 From: Adrian Lumpe Date: Sat, 14 Mar 2026 22:14:08 +0100 Subject: [PATCH 1/3] fix: use correct token param for newer OpenAI models --- src/services/ReasoningService.ts | 46 ++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/src/services/ReasoningService.ts b/src/services/ReasoningService.ts index fcaa0c2d..e44d0430 100644 --- a/src/services/ReasoningService.ts +++ b/src/services/ReasoningService.ts @@ -508,8 +508,6 @@ class ReasoningService extends BaseReasoningService { { role: "user", content: userPrompt }, ]; - const isOlderModel = model && (model.startsWith("gpt-4") || model.startsWith("gpt-3")); - const openAiBase = this.getConfiguredOpenAIBase(); const endpointCandidates = this.getOpenAIEndpointCandidates(openAiBase); const isCustomEndpoint = openAiBase !== API_ENDPOINTS.OPENAI_BASE; @@ -538,15 +536,31 @@ class ReasoningService extends BaseReasoningService { const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 30000); try { + const maxTokens = + config.maxTokens || + Math.max( + 4096, + this.calculateMaxTokens( + text.length, + TOKEN_LIMITS.MIN_TOKENS, + TOKEN_LIMITS.MAX_TOKENS, + TOKEN_LIMITS.TOKEN_MULTIPLIER + ) + ); + const requestBody: any = { model }; if (type === "responses") { requestBody.input = messages; requestBody.store = false; + requestBody.max_output_tokens = maxTokens; } else { requestBody.messages = messages; - if (isOlderModel) { + if (this.isOlderOpenAiModel(model)) { requestBody.temperature = config.temperature || 0.3; + requestBody.max_tokens = maxTokens; + } else { + requestBody.max_completion_tokens = maxTokens; } } @@ -1076,6 +1090,18 @@ class ReasoningService extends BaseReasoningService { } } + // Returns true for legacy OpenAI Chat Completions models that use max_tokens and temperature. + // Newer models (gpt-4.1+, gpt-5+) require max_completion_tokens and reject temperature. + private isOlderOpenAiModel(model: string): boolean { + if (!model) return false; + return ( + model.startsWith("gpt-3") || + model.startsWith("gpt-4o") || + model.startsWith("gpt-4-") || + model === "gpt-4" + ); + } + private getCustomPrompt(): string | undefined { try { const raw = localStorage.getItem("customUnifiedPrompt"); @@ -1124,14 +1150,24 @@ class ReasoningService extends BaseReasoningService { } } + const useOldTokenParam = + isLocalProvider || provider === "groq" || this.isOlderOpenAiModel(model); + const requestBody: Record = { model, messages, stream: true, - temperature: config.temperature ?? 0.3, - max_tokens: config.maxTokens || Math.max(4096, TOKEN_LIMITS.MAX_TOKENS), }; + const maxTokens = config.maxTokens || Math.max(4096, TOKEN_LIMITS.MAX_TOKENS); + + if (useOldTokenParam) { + requestBody.temperature = config.temperature ?? 0.3; + requestBody.max_tokens = maxTokens; + } else { + requestBody.max_completion_tokens = maxTokens; + } + logger.logReasoning("AGENT_STREAM_REQUEST", { endpoint, model, From ee18a9823501380754737abaface03e2f4fcd63b Mon Sep 17 00:00:00 2001 From: Gabriel Stein Date: Sun, 15 Mar 2026 12:55:48 -0700 Subject: [PATCH 2/3] fix: use registry metadata for OpenAI API params, add GPT-5.4 - Add tokenParam and supportsTemperature fields to CloudModelDefinition and model registry, replacing fragile string prefix matching - Add getOpenAiApiConfig() helper with fallback for unregistered models - Fix temperature regression: gpt-4.1 models now correctly send temperature (they support it), gpt-5 models correctly omit it - Add GPT-5.4 as new flagship model across all 10 locales - Remove isOlderOpenAiModel() in favor of registry-driven lookup --- src/locales/de/translation.json | 3 ++- src/locales/en/translation.json | 3 ++- src/locales/es/translation.json | 3 ++- src/locales/fr/translation.json | 3 ++- src/locales/it/translation.json | 3 ++- src/locales/ja/translation.json | 3 ++- src/locales/pt/translation.json | 3 ++- src/locales/ru/translation.json | 3 ++- src/locales/zh-CN/translation.json | 3 ++- src/locales/zh-TW/translation.json | 3 ++- src/models/ModelRegistry.ts | 36 ++++++++++++++++++++++++++++++ src/models/modelRegistryData.json | 34 ++++++++++++++++++++++------ src/services/ReasoningService.ts | 32 +++++++++----------------- 13 files changed, 94 insertions(+), 38 deletions(-) diff --git a/src/locales/de/translation.json b/src/locales/de/translation.json index 5f4efca5..cd7ca322 100644 --- a/src/locales/de/translation.json +++ b/src/locales/de/translation.json @@ -1293,7 +1293,8 @@ "mistral_voxtral_mini_latest": "Schnelle mehrsprachige Transkription" }, "cloud": { - "openai_gpt_5_2": "Neuestes Flaggschiff-Modell für Reasoning", + "openai_gpt_5_4": "Frontier-Modell für komplexes Reasoning", + "openai_gpt_5_2": "Starkes Reasoning-Modell", "openai_gpt_5_mini": "Schnell und kosteneffizient", "openai_gpt_5_nano": "Ultraschnell, niedrige Latenz", "openai_gpt_4_1": "Starke Basis, 1M Kontext", diff --git a/src/locales/en/translation.json b/src/locales/en/translation.json index 78c977de..2ecfdad4 100644 --- a/src/locales/en/translation.json +++ b/src/locales/en/translation.json @@ -1369,7 +1369,8 @@ "mistral_voxtral_mini_latest": "Fast multilingual transcription" }, "cloud": { - "openai_gpt_5_2": "Latest flagship reasoning model", + "openai_gpt_5_4": "Frontier model for complex reasoning", + "openai_gpt_5_2": "Strong reasoning model", "openai_gpt_5_mini": "Fast and cost-efficient", "openai_gpt_5_nano": "Ultra-fast, low latency", "openai_gpt_4_1": "Strong baseline, 1M context", diff --git a/src/locales/es/translation.json b/src/locales/es/translation.json index 55785ec4..1a50b6b3 100644 --- a/src/locales/es/translation.json +++ b/src/locales/es/translation.json @@ -1297,7 +1297,8 @@ "mistral_voxtral_mini_latest": "Transcripción multilingüe rápida" }, "cloud": { - "openai_gpt_5_2": "Modelo insignia de razonamiento más reciente", + "openai_gpt_5_4": "Modelo frontier para razonamiento complejo", + "openai_gpt_5_2": "Modelo fuerte de razonamiento", "openai_gpt_5_mini": "Rápido y económico", "openai_gpt_5_nano": "Ultrarrápido, baja latencia", "openai_gpt_4_1": "Base sólida, contexto de 1M", diff --git a/src/locales/fr/translation.json b/src/locales/fr/translation.json index 1e5617ab..3c6721aa 100644 --- a/src/locales/fr/translation.json +++ b/src/locales/fr/translation.json @@ -1297,7 +1297,8 @@ "mistral_voxtral_mini_latest": "Transcription multilingue rapide" }, "cloud": { - "openai_gpt_5_2": "Dernier modèle phare pour le raisonnement", + "openai_gpt_5_4": "Modèle frontier pour le raisonnement complexe", + "openai_gpt_5_2": "Modèle de raisonnement performant", "openai_gpt_5_mini": "Rapide et économique", "openai_gpt_5_nano": "Ultra-rapide, faible latence", "openai_gpt_4_1": "Base solide, contexte 1M", diff --git a/src/locales/it/translation.json b/src/locales/it/translation.json index b748525c..19e50e81 100644 --- a/src/locales/it/translation.json +++ b/src/locales/it/translation.json @@ -1297,7 +1297,8 @@ "mistral_voxtral_mini_latest": "Trascrizione multilingue veloce" }, "cloud": { - "openai_gpt_5_2": "Ultimo modello di punta per il ragionamento", + "openai_gpt_5_4": "Modello frontier per ragionamento complesso", + "openai_gpt_5_2": "Modello di ragionamento potente", "openai_gpt_5_mini": "Veloce ed efficiente nei costi", "openai_gpt_5_nano": "Ultra-veloce, bassa latenza", "openai_gpt_4_1": "Base solida, contesto da 1M", diff --git a/src/locales/ja/translation.json b/src/locales/ja/translation.json index 9bdad109..2c47804b 100644 --- a/src/locales/ja/translation.json +++ b/src/locales/ja/translation.json @@ -1293,7 +1293,8 @@ "mistral_voxtral_mini_latest": "高速多言語文字起こし" }, "cloud": { - "openai_gpt_5_2": "最新のフラッグシップ推論モデル", + "openai_gpt_5_4": "複雑な推論のためのフロンティアモデル", + "openai_gpt_5_2": "強力な推論モデル", "openai_gpt_5_mini": "高速で費用対効果が高い", "openai_gpt_5_nano": "超高速、低レイテンシー", "openai_gpt_4_1": "強力なベースライン、100 万トークンコンテキスト", diff --git a/src/locales/pt/translation.json b/src/locales/pt/translation.json index c3ca9aab..e541eeda 100644 --- a/src/locales/pt/translation.json +++ b/src/locales/pt/translation.json @@ -1269,7 +1269,8 @@ "mistral_voxtral_mini_latest": "Transcrição multilíngue rápida" }, "cloud": { - "openai_gpt_5_2": "Modelo principal de raciocínio mais recente", + "openai_gpt_5_4": "Modelo frontier para raciocínio complexo", + "openai_gpt_5_2": "Modelo forte de raciocínio", "openai_gpt_5_mini": "Rápido e eficiente em custo", "openai_gpt_5_nano": "Ultrarápido, baixa latência", "openai_gpt_4_1": "Base sólida, contexto de 1M", diff --git a/src/locales/ru/translation.json b/src/locales/ru/translation.json index 6d209898..a9a5dc20 100644 --- a/src/locales/ru/translation.json +++ b/src/locales/ru/translation.json @@ -1297,7 +1297,8 @@ "mistral_voxtral_mini_latest": "Быстрая многоязычная транскрипция" }, "cloud": { - "openai_gpt_5_2": "Новейшая флагманская модель с рассуждением", + "openai_gpt_5_4": "Фронтирная модель для сложного рассуждения", + "openai_gpt_5_2": "Сильная модель рассуждения", "openai_gpt_5_mini": "Быстрая и экономичная", "openai_gpt_5_nano": "Сверхбыстрая, низкая задержка", "openai_gpt_4_1": "Надёжная базовая модель, контекст 1M", diff --git a/src/locales/zh-CN/translation.json b/src/locales/zh-CN/translation.json index ba3b7046..8e950558 100644 --- a/src/locales/zh-CN/translation.json +++ b/src/locales/zh-CN/translation.json @@ -1293,7 +1293,8 @@ "mistral_voxtral_mini_latest": "快速多语言转录" }, "cloud": { - "openai_gpt_5_2": "最新旗舰推理模型", + "openai_gpt_5_4": "复杂推理前沿模型", + "openai_gpt_5_2": "强大的推理模型", "openai_gpt_5_mini": "快速且经济", "openai_gpt_5_nano": "超快速、低延迟", "openai_gpt_4_1": "稳定基准,1M 上下文", diff --git a/src/locales/zh-TW/translation.json b/src/locales/zh-TW/translation.json index b5c023d6..69cf8d62 100644 --- a/src/locales/zh-TW/translation.json +++ b/src/locales/zh-TW/translation.json @@ -1293,7 +1293,8 @@ "mistral_voxtral_mini_latest": "快速多語言轉錄" }, "cloud": { - "openai_gpt_5_2": "最新旗艦推理模型", + "openai_gpt_5_4": "複雜推理前沿模型", + "openai_gpt_5_2": "強大的推理模型", "openai_gpt_5_mini": "快速且經濟實惠", "openai_gpt_5_nano": "超快速,低延遲", "openai_gpt_4_1": "穩健基礎,1M 上下文", diff --git a/src/models/ModelRegistry.ts b/src/models/ModelRegistry.ts index 7cd5b8a9..6b7ca7c0 100644 --- a/src/models/ModelRegistry.ts +++ b/src/models/ModelRegistry.ts @@ -38,6 +38,8 @@ export interface CloudModelDefinition { description: string; descriptionKey?: string; disableThinking?: boolean; + tokenParam?: "max_tokens" | "max_completion_tokens"; + supportsTemperature?: boolean; } export interface CloudProviderData { @@ -327,6 +329,40 @@ export function getCloudModel(modelId: string): CloudModelDefinition | undefined return undefined; } +export interface OpenAiApiConfig { + tokenParam: "max_tokens" | "max_completion_tokens"; + supportsTemperature: boolean; +} + +export function getOpenAiApiConfig(modelId: string): OpenAiApiConfig { + const model = getCloudModel(modelId); + if (model?.tokenParam) { + return { + tokenParam: model.tokenParam, + supportsTemperature: model.supportsTemperature ?? true, + }; + } + + // Fallback for models not in the registry (custom model IDs, etc.) + const isLegacy = + modelId.startsWith("gpt-3") || + modelId.startsWith("gpt-4o") || + modelId.startsWith("gpt-4-") || + modelId === "gpt-4"; + + if (isLegacy) { + return { tokenParam: "max_tokens", supportsTemperature: true }; + } + + // gpt-4.1* supports temperature but uses max_completion_tokens + if (modelId.startsWith("gpt-4.1")) { + return { tokenParam: "max_completion_tokens", supportsTemperature: true }; + } + + // gpt-5* reasoning models: no temperature + return { tokenParam: "max_completion_tokens", supportsTemperature: false }; +} + export function getParakeetModels(): ParakeetModelsMap { return modelData.parakeetModels; } diff --git a/src/models/modelRegistryData.json b/src/models/modelRegistryData.json index 96fc97b1..50b5cf58 100644 --- a/src/models/modelRegistryData.json +++ b/src/models/modelRegistryData.json @@ -166,41 +166,61 @@ "id": "openai", "name": "OpenAI", "models": [ + { + "id": "gpt-5.4", + "name": "GPT-5.4", + "description": "Frontier model for complex reasoning", + "descriptionKey": "models.descriptions.cloud.openai_gpt_5_4", + "tokenParam": "max_completion_tokens", + "supportsTemperature": false + }, { "id": "gpt-5.2", "name": "GPT-5.2", - "description": "Latest flagship reasoning model", - "descriptionKey": "models.descriptions.cloud.openai_gpt_5_2" + "description": "Strong reasoning model", + "descriptionKey": "models.descriptions.cloud.openai_gpt_5_2", + "tokenParam": "max_completion_tokens", + "supportsTemperature": false }, { "id": "gpt-5-mini", "name": "GPT-5 Mini", "description": "Fast and cost-efficient", - "descriptionKey": "models.descriptions.cloud.openai_gpt_5_mini" + "descriptionKey": "models.descriptions.cloud.openai_gpt_5_mini", + "tokenParam": "max_completion_tokens", + "supportsTemperature": false }, { "id": "gpt-5-nano", "name": "GPT-5 Nano", "description": "Ultra-fast, low latency", - "descriptionKey": "models.descriptions.cloud.openai_gpt_5_nano" + "descriptionKey": "models.descriptions.cloud.openai_gpt_5_nano", + "tokenParam": "max_completion_tokens", + "supportsTemperature": false }, { "id": "gpt-4.1", "name": "GPT-4.1", "description": "Strong baseline, 1M context", - "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1" + "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1", + "tokenParam": "max_completion_tokens", + "supportsTemperature": true }, { "id": "gpt-4.1-mini", "name": "GPT-4.1 Mini", "description": "Smaller GPT-4.1 model", - "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_mini" + "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_mini", + "tokenParam": "max_completion_tokens", + "supportsTemperature": true }, { "id": "gpt-4.1-nano", "name": "GPT-4.1 Nano", "description": "Lowest latency GPT-4.1", - "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_nano" + "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_nano", + "tokenParam": "max_completion_tokens", + "supportsTemperature": true } ] }, diff --git a/src/services/ReasoningService.ts b/src/services/ReasoningService.ts index e44d0430..633efc3e 100644 --- a/src/services/ReasoningService.ts +++ b/src/services/ReasoningService.ts @@ -1,4 +1,4 @@ -import { getModelProvider, getCloudModel } from "../models/ModelRegistry"; +import { getModelProvider, getCloudModel, getOpenAiApiConfig } from "../models/ModelRegistry"; import { BaseReasoningService, ReasoningConfig } from "./BaseReasoningService"; import { SecureCache } from "../utils/SecureCache"; import { withRetry, createApiRetryStrategy } from "../utils/retry"; @@ -548,6 +548,7 @@ class ReasoningService extends BaseReasoningService { ) ); + const apiConfig = getOpenAiApiConfig(model); const requestBody: any = { model }; if (type === "responses") { @@ -556,11 +557,9 @@ class ReasoningService extends BaseReasoningService { requestBody.max_output_tokens = maxTokens; } else { requestBody.messages = messages; - if (this.isOlderOpenAiModel(model)) { + requestBody[apiConfig.tokenParam] = maxTokens; + if (apiConfig.supportsTemperature) { requestBody.temperature = config.temperature || 0.3; - requestBody.max_tokens = maxTokens; - } else { - requestBody.max_completion_tokens = maxTokens; } } @@ -1090,18 +1089,6 @@ class ReasoningService extends BaseReasoningService { } } - // Returns true for legacy OpenAI Chat Completions models that use max_tokens and temperature. - // Newer models (gpt-4.1+, gpt-5+) require max_completion_tokens and reject temperature. - private isOlderOpenAiModel(model: string): boolean { - if (!model) return false; - return ( - model.startsWith("gpt-3") || - model.startsWith("gpt-4o") || - model.startsWith("gpt-4-") || - model === "gpt-4" - ); - } - private getCustomPrompt(): string | undefined { try { const raw = localStorage.getItem("customUnifiedPrompt"); @@ -1150,8 +1137,8 @@ class ReasoningService extends BaseReasoningService { } } - const useOldTokenParam = - isLocalProvider || provider === "groq" || this.isOlderOpenAiModel(model); + const apiConfig = getOpenAiApiConfig(model); + const useOldTokenParam = isLocalProvider || provider === "groq"; const requestBody: Record = { model, @@ -1160,12 +1147,15 @@ class ReasoningService extends BaseReasoningService { }; const maxTokens = config.maxTokens || Math.max(4096, TOKEN_LIMITS.MAX_TOKENS); - + if (useOldTokenParam) { requestBody.temperature = config.temperature ?? 0.3; requestBody.max_tokens = maxTokens; } else { - requestBody.max_completion_tokens = maxTokens; + requestBody[apiConfig.tokenParam] = maxTokens; + if (apiConfig.supportsTemperature) { + requestBody.temperature = config.temperature ?? 0.3; + } } logger.logReasoning("AGENT_STREAM_REQUEST", { From eab0a6107b540be829d20d62681e5c03b12ef08d Mon Sep 17 00:00:00 2001 From: Gabriel Stein Date: Sun, 15 Mar 2026 12:59:05 -0700 Subject: [PATCH 3/3] fix: send temperature on Responses API for models that support it gpt-4.1 models support temperature on both the Responses API and Chat Completions endpoints. Move the temperature check outside the if/else branch so it applies to both API paths. --- src/services/ReasoningService.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/services/ReasoningService.ts b/src/services/ReasoningService.ts index 987d1aae..f822d99e 100644 --- a/src/services/ReasoningService.ts +++ b/src/services/ReasoningService.ts @@ -558,9 +558,10 @@ class ReasoningService extends BaseReasoningService { } else { requestBody.messages = messages; requestBody[apiConfig.tokenParam] = maxTokens; - if (apiConfig.supportsTemperature) { - requestBody.temperature = config.temperature || 0.3; - } + } + + if (apiConfig.supportsTemperature) { + requestBody.temperature = config.temperature || 0.3; } const res = await fetch(endpoint, {