Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/locales/de/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1364,7 +1364,8 @@
"mistral_voxtral_mini_latest": "Schnelle mehrsprachige Transkription"
},
"cloud": {
"openai_gpt_5_2": "Neuestes Flaggschiff-Modell für Reasoning",
"openai_gpt_5_4": "Frontier-Modell für komplexes Reasoning",
"openai_gpt_5_2": "Starkes Reasoning-Modell",
"openai_gpt_5_mini": "Schnell und kosteneffizient",
"openai_gpt_5_nano": "Ultraschnell, niedrige Latenz",
"openai_gpt_4_1": "Starke Basis, 1M Kontext",
Expand Down
3 changes: 2 additions & 1 deletion src/locales/en/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1434,7 +1434,8 @@
"mistral_voxtral_mini_latest": "Fast multilingual transcription"
},
"cloud": {
"openai_gpt_5_2": "Latest flagship reasoning model",
"openai_gpt_5_4": "Frontier model for complex reasoning",
"openai_gpt_5_2": "Strong reasoning model",
"openai_gpt_5_mini": "Fast and cost-efficient",
"openai_gpt_5_nano": "Ultra-fast, low latency",
"openai_gpt_4_1": "Strong baseline, 1M context",
Expand Down
3 changes: 2 additions & 1 deletion src/locales/es/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1364,7 +1364,8 @@
"mistral_voxtral_mini_latest": "Transcripción multilingüe rápida"
},
"cloud": {
"openai_gpt_5_2": "Modelo insignia de razonamiento más reciente",
"openai_gpt_5_4": "Modelo frontier para razonamiento complejo",
"openai_gpt_5_2": "Modelo fuerte de razonamiento",
"openai_gpt_5_mini": "Rápido y económico",
"openai_gpt_5_nano": "Ultrarrápido, baja latencia",
"openai_gpt_4_1": "Base sólida, contexto de 1M",
Expand Down
3 changes: 2 additions & 1 deletion src/locales/fr/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1364,7 +1364,8 @@
"mistral_voxtral_mini_latest": "Transcription multilingue rapide"
},
"cloud": {
"openai_gpt_5_2": "Dernier modèle phare pour le raisonnement",
"openai_gpt_5_4": "Modèle frontier pour le raisonnement complexe",
"openai_gpt_5_2": "Modèle de raisonnement performant",
"openai_gpt_5_mini": "Rapide et économique",
"openai_gpt_5_nano": "Ultra-rapide, faible latence",
"openai_gpt_4_1": "Base solide, contexte 1M",
Expand Down
3 changes: 2 additions & 1 deletion src/locales/it/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1364,7 +1364,8 @@
"mistral_voxtral_mini_latest": "Trascrizione multilingue veloce"
},
"cloud": {
"openai_gpt_5_2": "Ultimo modello di punta per il ragionamento",
"openai_gpt_5_4": "Modello frontier per ragionamento complesso",
"openai_gpt_5_2": "Modello di ragionamento potente",
"openai_gpt_5_mini": "Veloce ed efficiente nei costi",
"openai_gpt_5_nano": "Ultra-veloce, bassa latenza",
"openai_gpt_4_1": "Base solida, contesto da 1M",
Expand Down
3 changes: 2 additions & 1 deletion src/locales/ja/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1364,7 +1364,8 @@
"mistral_voxtral_mini_latest": "高速多言語文字起こし"
},
"cloud": {
"openai_gpt_5_2": "最新のフラッグシップ推論モデル",
"openai_gpt_5_4": "複雑な推論のためのフロンティアモデル",
"openai_gpt_5_2": "強力な推論モデル",
"openai_gpt_5_mini": "高速で費用対効果が高い",
"openai_gpt_5_nano": "超高速、低レイテンシー",
"openai_gpt_4_1": "強力なベースライン、100 万トークンコンテキスト",
Expand Down
3 changes: 2 additions & 1 deletion src/locales/pt/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1336,7 +1336,8 @@
"mistral_voxtral_mini_latest": "Transcrição multilíngue rápida"
},
"cloud": {
"openai_gpt_5_2": "Modelo principal de raciocínio mais recente",
"openai_gpt_5_4": "Modelo frontier para raciocínio complexo",
"openai_gpt_5_2": "Modelo forte de raciocínio",
"openai_gpt_5_mini": "Rápido e eficiente em custo",
"openai_gpt_5_nano": "Ultrarápido, baixa latência",
"openai_gpt_4_1": "Base sólida, contexto de 1M",
Expand Down
3 changes: 2 additions & 1 deletion src/locales/ru/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1364,7 +1364,8 @@
"mistral_voxtral_mini_latest": "Быстрая многоязычная транскрипция"
},
"cloud": {
"openai_gpt_5_2": "Новейшая флагманская модель с рассуждением",
"openai_gpt_5_4": "Фронтирная модель для сложного рассуждения",
"openai_gpt_5_2": "Сильная модель рассуждения",
"openai_gpt_5_mini": "Быстрая и экономичная",
"openai_gpt_5_nano": "Сверхбыстрая, низкая задержка",
"openai_gpt_4_1": "Надёжная базовая модель, контекст 1M",
Expand Down
3 changes: 2 additions & 1 deletion src/locales/zh-CN/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1364,7 +1364,8 @@
"mistral_voxtral_mini_latest": "快速多语言转录"
},
"cloud": {
"openai_gpt_5_2": "最新旗舰推理模型",
"openai_gpt_5_4": "复杂推理前沿模型",
"openai_gpt_5_2": "强大的推理模型",
"openai_gpt_5_mini": "快速且经济",
"openai_gpt_5_nano": "超快速、低延迟",
"openai_gpt_4_1": "稳定基准,1M 上下文",
Expand Down
3 changes: 2 additions & 1 deletion src/locales/zh-TW/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1364,7 +1364,8 @@
"mistral_voxtral_mini_latest": "快速多語言轉錄"
},
"cloud": {
"openai_gpt_5_2": "最新旗艦推理模型",
"openai_gpt_5_4": "複雜推理前沿模型",
"openai_gpt_5_2": "強大的推理模型",
"openai_gpt_5_mini": "快速且經濟實惠",
"openai_gpt_5_nano": "超快速,低延遲",
"openai_gpt_4_1": "穩健基礎,1M 上下文",
Expand Down
36 changes: 36 additions & 0 deletions src/models/ModelRegistry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ export interface CloudModelDefinition {
description: string;
descriptionKey?: string;
disableThinking?: boolean;
tokenParam?: "max_tokens" | "max_completion_tokens";
supportsTemperature?: boolean;
}

export interface CloudProviderData {
Expand Down Expand Up @@ -327,6 +329,40 @@ export function getCloudModel(modelId: string): CloudModelDefinition | undefined
return undefined;
}

export interface OpenAiApiConfig {
tokenParam: "max_tokens" | "max_completion_tokens";
supportsTemperature: boolean;
}

export function getOpenAiApiConfig(modelId: string): OpenAiApiConfig {
const model = getCloudModel(modelId);
if (model?.tokenParam) {
return {
tokenParam: model.tokenParam,
supportsTemperature: model.supportsTemperature ?? true,
};
}

// Fallback for models not in the registry (custom model IDs, etc.)
const isLegacy =
modelId.startsWith("gpt-3") ||
modelId.startsWith("gpt-4o") ||
modelId.startsWith("gpt-4-") ||
modelId === "gpt-4";

if (isLegacy) {
return { tokenParam: "max_tokens", supportsTemperature: true };
}

// gpt-4.1* supports temperature but uses max_completion_tokens
if (modelId.startsWith("gpt-4.1")) {
return { tokenParam: "max_completion_tokens", supportsTemperature: true };
}

// gpt-5* reasoning models: no temperature
return { tokenParam: "max_completion_tokens", supportsTemperature: false };
}

export function getParakeetModels(): ParakeetModelsMap {
return modelData.parakeetModels;
}
Expand Down
34 changes: 27 additions & 7 deletions src/models/modelRegistryData.json
Original file line number Diff line number Diff line change
Expand Up @@ -166,41 +166,61 @@
"id": "openai",
"name": "OpenAI",
"models": [
{
"id": "gpt-5.4",
"name": "GPT-5.4",
"description": "Frontier model for complex reasoning",
"descriptionKey": "models.descriptions.cloud.openai_gpt_5_4",
"tokenParam": "max_completion_tokens",
"supportsTemperature": false
},
{
"id": "gpt-5.2",
"name": "GPT-5.2",
"description": "Latest flagship reasoning model",
"descriptionKey": "models.descriptions.cloud.openai_gpt_5_2"
"description": "Strong reasoning model",
"descriptionKey": "models.descriptions.cloud.openai_gpt_5_2",
"tokenParam": "max_completion_tokens",
"supportsTemperature": false
},
{
"id": "gpt-5-mini",
"name": "GPT-5 Mini",
"description": "Fast and cost-efficient",
"descriptionKey": "models.descriptions.cloud.openai_gpt_5_mini"
"descriptionKey": "models.descriptions.cloud.openai_gpt_5_mini",
"tokenParam": "max_completion_tokens",
"supportsTemperature": false
},
{
"id": "gpt-5-nano",
"name": "GPT-5 Nano",
"description": "Ultra-fast, low latency",
"descriptionKey": "models.descriptions.cloud.openai_gpt_5_nano"
"descriptionKey": "models.descriptions.cloud.openai_gpt_5_nano",
"tokenParam": "max_completion_tokens",
"supportsTemperature": false
},
{
"id": "gpt-4.1",
"name": "GPT-4.1",
"description": "Strong baseline, 1M context",
"descriptionKey": "models.descriptions.cloud.openai_gpt_4_1"
"descriptionKey": "models.descriptions.cloud.openai_gpt_4_1",
"tokenParam": "max_completion_tokens",
"supportsTemperature": true
},
{
"id": "gpt-4.1-mini",
"name": "GPT-4.1 Mini",
"description": "Smaller GPT-4.1 model",
"descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_mini"
"descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_mini",
"tokenParam": "max_completion_tokens",
"supportsTemperature": true
},
{
"id": "gpt-4.1-nano",
"name": "GPT-4.1 Nano",
"description": "Lowest latency GPT-4.1",
"descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_nano"
"descriptionKey": "models.descriptions.cloud.openai_gpt_4_1_nano",
"tokenParam": "max_completion_tokens",
"supportsTemperature": true
}
]
},
Expand Down
43 changes: 35 additions & 8 deletions src/services/ReasoningService.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { getModelProvider, getCloudModel } from "../models/ModelRegistry";
import { getModelProvider, getCloudModel, getOpenAiApiConfig } from "../models/ModelRegistry";
import { BaseReasoningService, ReasoningConfig } from "./BaseReasoningService";
import { SecureCache } from "../utils/SecureCache";
import { withRetry, createApiRetryStrategy } from "../utils/retry";
Expand Down Expand Up @@ -508,8 +508,6 @@ class ReasoningService extends BaseReasoningService {
{ role: "user", content: userPrompt },
];

const isOlderModel = model && (model.startsWith("gpt-4") || model.startsWith("gpt-3"));

const openAiBase = this.getConfiguredOpenAIBase();
const endpointCandidates = this.getOpenAIEndpointCandidates(openAiBase);
const isCustomEndpoint = openAiBase !== API_ENDPOINTS.OPENAI_BASE;
Expand Down Expand Up @@ -538,16 +536,32 @@ class ReasoningService extends BaseReasoningService {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 30000);
try {
const maxTokens =
config.maxTokens ||
Math.max(
4096,
this.calculateMaxTokens(
text.length,
TOKEN_LIMITS.MIN_TOKENS,
TOKEN_LIMITS.MAX_TOKENS,
TOKEN_LIMITS.TOKEN_MULTIPLIER
)
);

const apiConfig = getOpenAiApiConfig(model);
const requestBody: any = { model };

if (type === "responses") {
requestBody.input = messages;
requestBody.store = false;
requestBody.max_output_tokens = maxTokens;
} else {
requestBody.messages = messages;
if (isOlderModel) {
requestBody.temperature = config.temperature || 0.3;
}
requestBody[apiConfig.tokenParam] = maxTokens;
}

if (apiConfig.supportsTemperature) {
requestBody.temperature = config.temperature || 0.3;
}

const res = await fetch(endpoint, {
Expand Down Expand Up @@ -1124,14 +1138,27 @@ class ReasoningService extends BaseReasoningService {
}
}

const apiConfig = getOpenAiApiConfig(model);
const useOldTokenParam = isLocalProvider || provider === "groq";

const requestBody: Record<string, unknown> = {
model,
messages,
stream: true,
temperature: config.temperature ?? 0.3,
max_tokens: config.maxTokens || Math.max(4096, TOKEN_LIMITS.MAX_TOKENS),
};

const maxTokens = config.maxTokens || Math.max(4096, TOKEN_LIMITS.MAX_TOKENS);

if (useOldTokenParam) {
requestBody.temperature = config.temperature ?? 0.3;
requestBody.max_tokens = maxTokens;
} else {
requestBody[apiConfig.tokenParam] = maxTokens;
if (apiConfig.supportsTemperature) {
requestBody.temperature = config.temperature ?? 0.3;
}
}

logger.logReasoning("AGENT_STREAM_REQUEST", {
endpoint,
model,
Expand Down