diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 46331c7cff7b..c5c8fdb71d73 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -15394,8 +15394,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 7.2e-08, - "output_cost_per_token": 7.2e-08, + "input_cost_per_token": 8e-08, + "output_cost_per_token": 9e-08, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -15414,8 +15414,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 2.8e-07, + "input_cost_per_token": 1.2e-07, + "output_cost_per_token": 3e-07, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": false @@ -15534,9 +15534,28 @@ "max_tokens": 262144, "max_input_tokens": 262144, "max_output_tokens": 262144, - "input_cost_per_token": 3e-07, + "input_cost_per_token": 2.9e-07, "output_cost_per_token": 1.2e-06, - "cache_read_input_token_cost": 2.4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-Next-80B-A3B-Instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1.4e-07, + "output_cost_per_token": 1.4e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-Next-80B-A3B-Thinking": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1.4e-07, + "output_cost_per_token": 1.4e-06, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -15647,8 +15666,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 6e-07, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": false @@ -15657,8 +15676,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 7.5e-08, - "output_cost_per_token": 1.5e-07, + "input_cost_per_token": 2.7e-07, + "output_cost_per_token": 2.7e-07, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -15703,8 +15722,7 @@ "cache_read_input_token_cost": 2.16e-07, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true, - "supports_reasoning": true + "supports_tool_choice": true }, "deepinfra/google/gemini-2.0-flash-001": { "max_tokens": 1000000, @@ -15966,6 +15984,17 @@ "mode": "chat", "supports_tool_choice": true }, + "deepinfra/moonshotai/Kimi-K2-Instruct-0905": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 2e-06, + "cache_read_input_token_cost": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, "deepinfra/nvidia/Llama-3.1-Nemotron-70B-Instruct": { "max_tokens": 131072, "max_input_tokens": 131072, @@ -15976,6 +16005,26 @@ "mode": "chat", "supports_tool_choice": true }, + "deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 4e-08, + "output_cost_per_token": 1.6e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, "deepinfra/openai/gpt-oss-120b": { "max_tokens": 131072, "max_input_tokens": 131072,