Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/api/providers/fetchers/__tests__/openrouter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ describe("OpenRouter API", () => {
const models = await getOpenRouterModels()

const openRouterSupportedCaching = Object.entries(models)
.filter(([id, _]) => id.startsWith("anthropic/claude") || id.startsWith("google/gemini")) // only these support cache_control breakpoints (https://openrouter.ai/docs/features/prompt-caching)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be helpful to expand this comment to explain the cache_control limitation more clearly? Something like:

Suggested change
.filter(([id, _]) => id.startsWith("anthropic/claude") || id.startsWith("google/gemini")) // only these support cache_control breakpoints (https://openrouter.ai/docs/features/prompt-caching)
.filter(([id, _]) => id.startsWith("anthropic/claude") || id.startsWith("google/gemini")) // Only Anthropic Claude and Google Gemini models support cache_control breakpoints for explicit cache management (https://openrouter.ai/docs/features/prompt-caching)

.filter(([_, model]) => model.supportsPromptCache)
.map(([id, _]) => id)

Expand Down Expand Up @@ -229,7 +230,7 @@ describe("OpenRouter API", () => {
const endpoints = await getOpenRouterModelEndpoints("google/gemini-2.5-pro-preview")

expect(endpoints).toEqual({
Google: {
"google-vertex": {
maxTokens: 65535,
contextWindow: 1048576,
supportsImages: true,
Expand All @@ -243,7 +244,7 @@ describe("OpenRouter API", () => {
supportsReasoningEffort: undefined,
supportedParameters: undefined,
},
"Google AI Studio": {
"google-ai-studio": {
maxTokens: 65536,
contextWindow: 1048576,
supportsImages: true,
Expand Down
5 changes: 3 additions & 2 deletions src/api/providers/fetchers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ export type OpenRouterModel = z.infer<typeof openRouterModelSchema>

export const openRouterModelEndpointSchema = modelRouterBaseModelSchema.extend({
provider_name: z.string(),
tag: z.string().optional(),
})

export type OpenRouterModelEndpoint = z.infer<typeof openRouterModelEndpointSchema>
Expand Down Expand Up @@ -149,7 +150,7 @@ export async function getOpenRouterModelEndpoints(
const { id, architecture, endpoints } = data

for (const endpoint of endpoints) {
models[endpoint.provider_name] = parseOpenRouterModel({
models[endpoint.tag ?? endpoint.provider_name] = parseOpenRouterModel({
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this fallback to endpoint.provider_name intentional for backward compatibility? If the OpenRouter API doesn't always provide tags, this is a good defensive approach.

id,
model: endpoint,
modality: architecture?.modality,
Expand Down Expand Up @@ -188,7 +189,7 @@ export const parseOpenRouterModel = ({

const cacheReadsPrice = model.pricing?.input_cache_read ? parseApiPrice(model.pricing?.input_cache_read) : undefined

const supportsPromptCache = typeof cacheWritesPrice !== "undefined" && typeof cacheReadsPrice !== "undefined"
const supportsPromptCache = typeof cacheReadsPrice !== "undefined" // some models support caching but don't charge a cacheWritesPrice, e.g. GPT-5
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we make this comment more descriptive? Consider:

Suggested change
const supportsPromptCache = typeof cacheReadsPrice !== "undefined" // some models support caching but don't charge a cacheWritesPrice, e.g. GPT-5
const supportsPromptCache = typeof cacheReadsPrice !== "undefined" // OpenRouter reports cache support based on read price only, as some models support caching without charging for cache writes (e.g. GPT-5)


const modelInfo: ModelInfo = {
maxTokens: maxTokens || Math.ceil(model.context_length * 0.2),
Expand Down
38 changes: 10 additions & 28 deletions webview-ui/src/components/ui/hooks/useOpenRouterModelProviders.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@ const openRouterEndpointsSchema = z.object({
endpoints: z.array(
z.object({
name: z.string(),
tag: z.string().optional(),
context_length: z.number(),
max_completion_tokens: z.number().nullish(),
pricing: z
.object({
prompt: z.union([z.string(), z.number()]).optional(),
completion: z.union([z.string(), z.number()]).optional(),
input_cache_read: z.union([z.string(), z.number()]).optional(),
input_cache_write: z.union([z.string(), z.number()]).optional(),
})
.optional(),
}),
Expand All @@ -51,49 +54,28 @@ async function getOpenRouterProvidersForModel(modelId: string) {
return models
}

const { id, description, architecture, endpoints } = result.data.data
const { description, architecture, endpoints } = result.data.data

for (const endpoint of endpoints) {
const providerName = endpoint.name.split("|")[0].trim()
const providerName = endpoint.tag ?? endpoint.name
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For consistency with the backend implementation, consider adding a comment explaining the fallback logic:

Suggested change
const providerName = endpoint.tag ?? endpoint.name
const providerName = endpoint.tag ?? endpoint.name // Use unique tag when available, fallback to name for backward compatibility

const inputPrice = parseApiPrice(endpoint.pricing?.prompt)
const outputPrice = parseApiPrice(endpoint.pricing?.completion)
const cacheReadsPrice = parseApiPrice(endpoint.pricing?.input_cache_read)
const cacheWritesPrice = parseApiPrice(endpoint.pricing?.input_cache_write)

const modelInfo: OpenRouterModelProvider = {
maxTokens: endpoint.max_completion_tokens || endpoint.context_length,
contextWindow: endpoint.context_length,
supportsImages: architecture?.modality?.includes("image"),
supportsPromptCache: false,
supportsPromptCache: typeof cacheReadsPrice !== "undefined",
cacheReadsPrice,
cacheWritesPrice,
inputPrice,
outputPrice,
description,
label: providerName,
}

// TODO: This is wrong. We need to fetch the model info from
// OpenRouter instead of hardcoding it here. The endpoints payload
// doesn't include this unfortunately, so we need to get it from the
// main models endpoint.
switch (true) {
case modelId.startsWith("anthropic/claude-3.7-sonnet"):
modelInfo.supportsComputerUse = true
modelInfo.supportsPromptCache = true
modelInfo.cacheWritesPrice = 3.75
modelInfo.cacheReadsPrice = 0.3
modelInfo.maxTokens = id === "anthropic/claude-3.7-sonnet:thinking" ? 64_000 : 8192
break
case modelId.startsWith("anthropic/claude-3.5-sonnet-20240620"):
modelInfo.supportsPromptCache = true
modelInfo.cacheWritesPrice = 3.75
modelInfo.cacheReadsPrice = 0.3
modelInfo.maxTokens = 8192
break
default:
modelInfo.supportsPromptCache = true
modelInfo.cacheWritesPrice = 0.3
modelInfo.cacheReadsPrice = 0.03
break
}

models[providerName] = modelInfo
}
} catch (error) {
Expand Down
Loading