-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Show cache read and write prices for OpenRouter inference providers #7176
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -58,6 +58,7 @@ export type OpenRouterModel = z.infer<typeof openRouterModelSchema> | |||||
|
||||||
export const openRouterModelEndpointSchema = modelRouterBaseModelSchema.extend({ | ||||||
provider_name: z.string(), | ||||||
tag: z.string().optional(), | ||||||
}) | ||||||
|
||||||
export type OpenRouterModelEndpoint = z.infer<typeof openRouterModelEndpointSchema> | ||||||
|
@@ -149,7 +150,7 @@ export async function getOpenRouterModelEndpoints( | |||||
const { id, architecture, endpoints } = data | ||||||
|
||||||
for (const endpoint of endpoints) { | ||||||
models[endpoint.provider_name] = parseOpenRouterModel({ | ||||||
models[endpoint.tag ?? endpoint.provider_name] = parseOpenRouterModel({ | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this fallback to |
||||||
id, | ||||||
model: endpoint, | ||||||
modality: architecture?.modality, | ||||||
|
@@ -188,7 +189,7 @@ export const parseOpenRouterModel = ({ | |||||
|
||||||
const cacheReadsPrice = model.pricing?.input_cache_read ? parseApiPrice(model.pricing?.input_cache_read) : undefined | ||||||
|
||||||
const supportsPromptCache = typeof cacheWritesPrice !== "undefined" && typeof cacheReadsPrice !== "undefined" | ||||||
const supportsPromptCache = typeof cacheReadsPrice !== "undefined" // some models support caching but don't charge a cacheWritesPrice, e.g. GPT-5 | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we make this comment more descriptive? Consider:
Suggested change
|
||||||
|
||||||
const modelInfo: ModelInfo = { | ||||||
maxTokens: maxTokens || Math.ceil(model.context_length * 0.2), | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -22,12 +22,15 @@ const openRouterEndpointsSchema = z.object({ | |||||
endpoints: z.array( | ||||||
z.object({ | ||||||
name: z.string(), | ||||||
tag: z.string().optional(), | ||||||
context_length: z.number(), | ||||||
max_completion_tokens: z.number().nullish(), | ||||||
pricing: z | ||||||
.object({ | ||||||
prompt: z.union([z.string(), z.number()]).optional(), | ||||||
completion: z.union([z.string(), z.number()]).optional(), | ||||||
input_cache_read: z.union([z.string(), z.number()]).optional(), | ||||||
input_cache_write: z.union([z.string(), z.number()]).optional(), | ||||||
}) | ||||||
.optional(), | ||||||
}), | ||||||
|
@@ -51,49 +54,28 @@ async function getOpenRouterProvidersForModel(modelId: string) { | |||||
return models | ||||||
} | ||||||
|
||||||
const { id, description, architecture, endpoints } = result.data.data | ||||||
const { description, architecture, endpoints } = result.data.data | ||||||
|
||||||
for (const endpoint of endpoints) { | ||||||
const providerName = endpoint.name.split("|")[0].trim() | ||||||
const providerName = endpoint.tag ?? endpoint.name | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For consistency with the backend implementation, consider adding a comment explaining the fallback logic:
Suggested change
|
||||||
const inputPrice = parseApiPrice(endpoint.pricing?.prompt) | ||||||
const outputPrice = parseApiPrice(endpoint.pricing?.completion) | ||||||
const cacheReadsPrice = parseApiPrice(endpoint.pricing?.input_cache_read) | ||||||
const cacheWritesPrice = parseApiPrice(endpoint.pricing?.input_cache_write) | ||||||
|
||||||
const modelInfo: OpenRouterModelProvider = { | ||||||
maxTokens: endpoint.max_completion_tokens || endpoint.context_length, | ||||||
contextWindow: endpoint.context_length, | ||||||
supportsImages: architecture?.modality?.includes("image"), | ||||||
supportsPromptCache: false, | ||||||
supportsPromptCache: typeof cacheReadsPrice !== "undefined", | ||||||
cacheReadsPrice, | ||||||
cacheWritesPrice, | ||||||
inputPrice, | ||||||
outputPrice, | ||||||
description, | ||||||
label: providerName, | ||||||
} | ||||||
|
||||||
// TODO: This is wrong. We need to fetch the model info from | ||||||
// OpenRouter instead of hardcoding it here. The endpoints payload | ||||||
// doesn't include this unfortunately, so we need to get it from the | ||||||
// main models endpoint. | ||||||
switch (true) { | ||||||
case modelId.startsWith("anthropic/claude-3.7-sonnet"): | ||||||
modelInfo.supportsComputerUse = true | ||||||
modelInfo.supportsPromptCache = true | ||||||
modelInfo.cacheWritesPrice = 3.75 | ||||||
modelInfo.cacheReadsPrice = 0.3 | ||||||
modelInfo.maxTokens = id === "anthropic/claude-3.7-sonnet:thinking" ? 64_000 : 8192 | ||||||
break | ||||||
case modelId.startsWith("anthropic/claude-3.5-sonnet-20240620"): | ||||||
modelInfo.supportsPromptCache = true | ||||||
modelInfo.cacheWritesPrice = 3.75 | ||||||
modelInfo.cacheReadsPrice = 0.3 | ||||||
modelInfo.maxTokens = 8192 | ||||||
break | ||||||
default: | ||||||
modelInfo.supportsPromptCache = true | ||||||
modelInfo.cacheWritesPrice = 0.3 | ||||||
modelInfo.cacheReadsPrice = 0.03 | ||||||
break | ||||||
} | ||||||
|
||||||
models[providerName] = modelInfo | ||||||
} | ||||||
} catch (error) { | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be helpful to expand this comment to explain the cache_control limitation more clearly? Something like: