diff --git a/src/locales/de/translation.json b/src/locales/de/translation.json index 33f6acbb..cbf996ce 100644 --- a/src/locales/de/translation.json +++ b/src/locales/de/translation.json @@ -1364,7 +1364,8 @@ "mistral_voxtral_mini_latest": "Schnelle mehrsprachige Transkription" }, "cloud": { - "openai_gpt_5_2": "Neuestes Flaggschiff-Modell für Reasoning", + "openai_gpt_5_4": "Frontier-Modell für komplexes Reasoning", + "openai_gpt_5_2": "Starkes Reasoning-Modell", "openai_gpt_5_mini": "Schnell und kosteneffizient", "openai_gpt_5_nano": "Ultraschnell, niedrige Latenz", "openai_gpt_4_1": "Starke Basis, 1M Kontext", diff --git a/src/locales/en/translation.json b/src/locales/en/translation.json index bf54624a..7cf4e76f 100644 --- a/src/locales/en/translation.json +++ b/src/locales/en/translation.json @@ -1434,7 +1434,8 @@ "mistral_voxtral_mini_latest": "Fast multilingual transcription" }, "cloud": { - "openai_gpt_5_2": "Latest flagship reasoning model", + "openai_gpt_5_4": "Frontier model for complex reasoning", + "openai_gpt_5_2": "Strong reasoning model", "openai_gpt_5_mini": "Fast and cost-efficient", "openai_gpt_5_nano": "Ultra-fast, low latency", "openai_gpt_4_1": "Strong baseline, 1M context", diff --git a/src/locales/es/translation.json b/src/locales/es/translation.json index 5a6b5d93..a8581863 100644 --- a/src/locales/es/translation.json +++ b/src/locales/es/translation.json @@ -1364,7 +1364,8 @@ "mistral_voxtral_mini_latest": "Transcripción multilingüe rápida" }, "cloud": { - "openai_gpt_5_2": "Modelo insignia de razonamiento más reciente", + "openai_gpt_5_4": "Modelo frontier para razonamiento complejo", + "openai_gpt_5_2": "Modelo fuerte de razonamiento", "openai_gpt_5_mini": "Rápido y económico", "openai_gpt_5_nano": "Ultrarrápido, baja latencia", "openai_gpt_4_1": "Base sólida, contexto de 1M", diff --git a/src/locales/fr/translation.json b/src/locales/fr/translation.json index 5117050d..28a28eed 100644 --- a/src/locales/fr/translation.json +++ b/src/locales/fr/translation.json @@ -1364,7 +1364,8 @@ "mistral_voxtral_mini_latest": "Transcription multilingue rapide" }, "cloud": { - "openai_gpt_5_2": "Dernier modèle phare pour le raisonnement", + "openai_gpt_5_4": "Modèle frontier pour le raisonnement complexe", + "openai_gpt_5_2": "Modèle de raisonnement performant", "openai_gpt_5_mini": "Rapide et économique", "openai_gpt_5_nano": "Ultra-rapide, faible latence", "openai_gpt_4_1": "Base solide, contexte 1M", diff --git a/src/locales/it/translation.json b/src/locales/it/translation.json index 6a99089a..866b89a2 100644 --- a/src/locales/it/translation.json +++ b/src/locales/it/translation.json @@ -1364,7 +1364,8 @@ "mistral_voxtral_mini_latest": "Trascrizione multilingue veloce" }, "cloud": { - "openai_gpt_5_2": "Ultimo modello di punta per il ragionamento", + "openai_gpt_5_4": "Modello frontier per ragionamento complesso", + "openai_gpt_5_2": "Modello di ragionamento potente", "openai_gpt_5_mini": "Veloce ed efficiente nei costi", "openai_gpt_5_nano": "Ultra-veloce, bassa latenza", "openai_gpt_4_1": "Base solida, contesto da 1M", diff --git a/src/locales/ja/translation.json b/src/locales/ja/translation.json index 2877a1f0..d30cd0a5 100644 --- a/src/locales/ja/translation.json +++ b/src/locales/ja/translation.json @@ -1364,7 +1364,8 @@ "mistral_voxtral_mini_latest": "高速多言語文字起こし" }, "cloud": { - "openai_gpt_5_2": "最新のフラッグシップ推論モデル", + "openai_gpt_5_4": "複雑な推論のためのフロンティアモデル", + "openai_gpt_5_2": "強力な推論モデル", "openai_gpt_5_mini": "高速で費用対効果が高い", "openai_gpt_5_nano": "超高速、低レイテンシー", "openai_gpt_4_1": "強力なベースライン、100 万トークンコンテキスト", diff --git a/src/locales/pt/translation.json b/src/locales/pt/translation.json index e0c32cf5..1b3e892f 100644 --- a/src/locales/pt/translation.json +++ b/src/locales/pt/translation.json @@ -1336,7 +1336,8 @@ "mistral_voxtral_mini_latest": "Transcrição multilíngue rápida" }, "cloud": { - "openai_gpt_5_2": "Modelo principal de raciocínio mais recente", + "openai_gpt_5_4": "Modelo frontier para raciocínio complexo", + "openai_gpt_5_2": "Modelo forte de raciocínio", "openai_gpt_5_mini": "Rápido e eficiente em custo", "openai_gpt_5_nano": "Ultrarápido, baixa latência", "openai_gpt_4_1": "Base sólida, contexto de 1M", diff --git a/src/locales/ru/translation.json b/src/locales/ru/translation.json index 1873bd85..7cf46683 100644 --- a/src/locales/ru/translation.json +++ b/src/locales/ru/translation.json @@ -1364,7 +1364,8 @@ "mistral_voxtral_mini_latest": "Быстрая многоязычная транскрипция" }, "cloud": { - "openai_gpt_5_2": "Новейшая флагманская модель с рассуждением", + "openai_gpt_5_4": "Фронтирная модель для сложного рассуждения", + "openai_gpt_5_2": "Сильная модель рассуждения", "openai_gpt_5_mini": "Быстрая и экономичная", "openai_gpt_5_nano": "Сверхбыстрая, низкая задержка", "openai_gpt_4_1": "Надёжная базовая модель, контекст 1M", diff --git a/src/locales/zh-CN/translation.json b/src/locales/zh-CN/translation.json index a14b7a03..48ab5521 100644 --- a/src/locales/zh-CN/translation.json +++ b/src/locales/zh-CN/translation.json @@ -1364,7 +1364,8 @@ "mistral_voxtral_mini_latest": "快速多语言转录" }, "cloud": { - "openai_gpt_5_2": "最新旗舰推理模型", + "openai_gpt_5_4": "复杂推理前沿模型", + "openai_gpt_5_2": "强大的推理模型", "openai_gpt_5_mini": "快速且经济", "openai_gpt_5_nano": "超快速、低延迟", "openai_gpt_4_1": "稳定基准,1M 上下文", diff --git a/src/locales/zh-TW/translation.json b/src/locales/zh-TW/translation.json index f7a61b58..e1d0c6bc 100644 --- a/src/locales/zh-TW/translation.json +++ b/src/locales/zh-TW/translation.json @@ -1364,7 +1364,8 @@ "mistral_voxtral_mini_latest": "快速多語言轉錄" }, "cloud": { - "openai_gpt_5_2": "最新旗艦推理模型", + "openai_gpt_5_4": "複雜推理前沿模型", + "openai_gpt_5_2": "強大的推理模型", "openai_gpt_5_mini": "快速且經濟實惠", "openai_gpt_5_nano": "超快速,低延遲", "openai_gpt_4_1": "穩健基礎,1M 上下文", diff --git a/src/models/ModelRegistry.ts b/src/models/ModelRegistry.ts index 7cd5b8a9..6b7ca7c0 100644 --- a/src/models/ModelRegistry.ts +++ b/src/models/ModelRegistry.ts @@ -38,6 +38,8 @@ export interface CloudModelDefinition { description: string; descriptionKey?: string; disableThinking?: boolean; + tokenParam?: "max_tokens" | "max_completion_tokens"; + supportsTemperature?: boolean; } export interface CloudProviderData { @@ -327,6 +329,40 @@ export function getCloudModel(modelId: string): CloudModelDefinition | undefined return undefined; } +export interface OpenAiApiConfig { + tokenParam: "max_tokens" | "max_completion_tokens"; + supportsTemperature: boolean; +} + +export function getOpenAiApiConfig(modelId: string): OpenAiApiConfig { + const model = getCloudModel(modelId); + if (model?.tokenParam) { + return { + tokenParam: model.tokenParam, + supportsTemperature: model.supportsTemperature ?? true, + }; + } + + // Fallback for models not in the registry (custom model IDs, etc.) + const isLegacy = + modelId.startsWith("gpt-3") || + modelId.startsWith("gpt-4o") || + modelId.startsWith("gpt-4-") || + modelId === "gpt-4"; + + if (isLegacy) { + return { tokenParam: "max_tokens", supportsTemperature: true }; + } + + // gpt-4.1* supports temperature but uses max_completion_tokens + if (modelId.startsWith("gpt-4.1")) { + return { tokenParam: "max_completion_tokens", supportsTemperature: true }; + } + + // gpt-5* reasoning models: no temperature + return { tokenParam: "max_completion_tokens", supportsTemperature: false }; +} + export function getParakeetModels(): ParakeetModelsMap { return modelData.parakeetModels; } diff --git a/src/models/modelRegistryData.json b/src/models/modelRegistryData.json index 0d09cf86..5cb6526d 100644 --- a/src/models/modelRegistryData.json +++ b/src/models/modelRegistryData.json @@ -166,41 +166,61 @@ "id": "openai", "name": "OpenAI", "models": [ + { + "id": "gpt-5.4", + "name": "GPT-5.4", + "description": "Frontier model for complex reasoning", + "descriptionKey": "models.descriptions.cloud.openai_gpt_5_4", + "tokenParam": "max_completion_tokens", + "supportsTemperature": false + }, { "id": "gpt-5.2", "name": "GPT-5.2", - "description": "Latest flagship reasoning model", - "descriptionKey": "models.descriptions.cloud.openai_gpt_5_2" + "description": "Strong reasoning model", + "descriptionKey": "models.descriptions.cloud.openai_gpt_5_2", + "tokenParam": "max_completion_tokens", + "supportsTemperature": false }, { "id": "gpt-5-mini", "name": "GPT-5 Mini", "description": "Fast and cost-efficient", - "descriptionKey": "models.descriptions.cloud.openai_gpt_5_mini" + "descriptionKey": "models.descriptions.cloud.openai_gpt_5_mini", + "tokenParam": "max_completion_tokens", + "supportsTemperature": false }, { "id": "gpt-5-nano", "name": "GPT-5 Nano", "description": "Ultra-fast, low latency", - "descriptionKey": "models.descriptions.cloud.openai_gpt_5_nano" + "descriptionKey": "models.descriptions.cloud.openai_gpt_5_nano", + "tokenParam": "max_completion_tokens", + "supportsTemperature": false }, { "id": "gpt-4.1", "name": "GPT-4.1", "description": "Strong baseline, 1M context", - "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1" + "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1", + "tokenParam": "max_completion_tokens", + "supportsTemperature": true }, { "id": "gpt-4.1-mini", "name": "GPT-4.1 Mini", "description": "Smaller GPT-4.1 model", - "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_mini" + "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_mini", + "tokenParam": "max_completion_tokens", + "supportsTemperature": true }, { "id": "gpt-4.1-nano", "name": "GPT-4.1 Nano", "description": "Lowest latency GPT-4.1", - "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_nano" + "descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_nano", + "tokenParam": "max_completion_tokens", + "supportsTemperature": true } ] }, diff --git a/src/services/ReasoningService.ts b/src/services/ReasoningService.ts index f1eb2650..f822d99e 100644 --- a/src/services/ReasoningService.ts +++ b/src/services/ReasoningService.ts @@ -1,4 +1,4 @@ -import { getModelProvider, getCloudModel } from "../models/ModelRegistry"; +import { getModelProvider, getCloudModel, getOpenAiApiConfig } from "../models/ModelRegistry"; import { BaseReasoningService, ReasoningConfig } from "./BaseReasoningService"; import { SecureCache } from "../utils/SecureCache"; import { withRetry, createApiRetryStrategy } from "../utils/retry"; @@ -508,8 +508,6 @@ class ReasoningService extends BaseReasoningService { { role: "user", content: userPrompt }, ]; - const isOlderModel = model && (model.startsWith("gpt-4") || model.startsWith("gpt-3")); - const openAiBase = this.getConfiguredOpenAIBase(); const endpointCandidates = this.getOpenAIEndpointCandidates(openAiBase); const isCustomEndpoint = openAiBase !== API_ENDPOINTS.OPENAI_BASE; @@ -538,16 +536,32 @@ class ReasoningService extends BaseReasoningService { const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 30000); try { + const maxTokens = + config.maxTokens || + Math.max( + 4096, + this.calculateMaxTokens( + text.length, + TOKEN_LIMITS.MIN_TOKENS, + TOKEN_LIMITS.MAX_TOKENS, + TOKEN_LIMITS.TOKEN_MULTIPLIER + ) + ); + + const apiConfig = getOpenAiApiConfig(model); const requestBody: any = { model }; if (type === "responses") { requestBody.input = messages; requestBody.store = false; + requestBody.max_output_tokens = maxTokens; } else { requestBody.messages = messages; - if (isOlderModel) { - requestBody.temperature = config.temperature || 0.3; - } + requestBody[apiConfig.tokenParam] = maxTokens; + } + + if (apiConfig.supportsTemperature) { + requestBody.temperature = config.temperature || 0.3; } const res = await fetch(endpoint, { @@ -1124,14 +1138,27 @@ class ReasoningService extends BaseReasoningService { } } + const apiConfig = getOpenAiApiConfig(model); + const useOldTokenParam = isLocalProvider || provider === "groq"; + const requestBody: Record = { model, messages, stream: true, - temperature: config.temperature ?? 0.3, - max_tokens: config.maxTokens || Math.max(4096, TOKEN_LIMITS.MAX_TOKENS), }; + const maxTokens = config.maxTokens || Math.max(4096, TOKEN_LIMITS.MAX_TOKENS); + + if (useOldTokenParam) { + requestBody.temperature = config.temperature ?? 0.3; + requestBody.max_tokens = maxTokens; + } else { + requestBody[apiConfig.tokenParam] = maxTokens; + if (apiConfig.supportsTemperature) { + requestBody.temperature = config.temperature ?? 0.3; + } + } + logger.logReasoning("AGENT_STREAM_REQUEST", { endpoint, model,