diff --git a/docs/models.md b/docs/models.md index b383c574ec..88a3011dc4 100644 --- a/docs/models.md +++ b/docs/models.md @@ -47,6 +47,7 @@ GPT-5 family of models: - `openai:gpt-5.1` - `openai:gpt-5-pro` - `openai:gpt-5.1-codex` +- `openai:gpt-5.1-codex-max` — supports the XHIGH (extra high) thinking level; aliases: `gpt-5.1-codex-max`, `codex-max` - `openai:gpt-5.1-codex-mini` #### Google (Cloud) diff --git a/src/browser/components/ChatInput/index.tsx b/src/browser/components/ChatInput/index.tsx index 235bacd253..3bf43dd03e 100644 --- a/src/browser/components/ChatInput/index.tsx +++ b/src/browser/components/ChatInput/index.tsx @@ -515,6 +515,7 @@ export const ChatInput: React.FC = (props) => { low: "Low — adds light reasoning", medium: "Medium — balanced reasoning", high: "High — maximum reasoning depth", + xhigh: "Extra High — extended deep thinking", }; setToast({ diff --git a/src/browser/components/ThinkingSlider.tsx b/src/browser/components/ThinkingSlider.tsx index ad2f4b50cd..d05124c097 100644 --- a/src/browser/components/ThinkingSlider.tsx +++ b/src/browser/components/ThinkingSlider.tsx @@ -123,10 +123,14 @@ export const ThinkingSliderComponent: React.FC = ({ modelS const sliderValue = currentIndex === -1 ? 0 : currentIndex; const maxSteps = allowed.length - 1; - // For styling, we still want to map to the "global" intensity 0-3 - // to keep colors consistent (e.g. "high" is always purple, even if it's step 1 of 2) - const globalLevelIndex = ["off", "low", "medium", "high"].indexOf(thinkingLevel); - const visualValue = globalLevelIndex === -1 ? 0 : globalLevelIndex; + // Map levels to visual intensity indices (0-3) so colors/glow stay consistent + // Levels outside the base 4 (e.g., xhigh) map to the strongest intensity + const baseVisualOrder: ThinkingLevel[] = ["off", "low", "medium", "high"]; + const visualValue = (() => { + const idx = baseVisualOrder.indexOf(thinkingLevel); + if (idx >= 0) return idx; + return baseVisualOrder.length - 1; // clamp extras (e.g., xhigh) to strongest glow + })(); const sliderStyles = getSliderStyles(visualValue, isHovering); const textStyle = getTextStyle(visualValue); diff --git a/src/browser/utils/commands/sources.ts b/src/browser/utils/commands/sources.ts index 819bb7a8ad..01791e58b6 100644 --- a/src/browser/utils/commands/sources.ts +++ b/src/browser/utils/commands/sources.ts @@ -50,7 +50,7 @@ export interface BuildSourcesParams { onOpenSettings?: (section?: string) => void; } -const THINKING_LEVELS: ThinkingLevel[] = ["off", "low", "medium", "high"]; +const THINKING_LEVELS: ThinkingLevel[] = ["off", "low", "medium", "high", "xhigh"]; /** * Command palette section names @@ -431,6 +431,7 @@ export function buildCoreSources(p: BuildSourcesParams): Array<() => CommandActi low: "Low — add a bit of reasoning", medium: "Medium — balanced reasoning", high: "High — maximum reasoning depth", + xhigh: "Extra High — extended deep thinking", }; const currentLevel = p.getThinkingLevel(workspaceId); diff --git a/src/browser/utils/thinking/policy.test.ts b/src/browser/utils/thinking/policy.test.ts index 86bc4a9030..6ec91aeeda 100644 --- a/src/browser/utils/thinking/policy.test.ts +++ b/src/browser/utils/thinking/policy.test.ts @@ -2,6 +2,56 @@ import { describe, expect, test } from "bun:test"; import { getThinkingPolicyForModel, enforceThinkingPolicy } from "./policy"; describe("getThinkingPolicyForModel", () => { + test("returns 5 levels including xhigh for gpt-5.1-codex-max", () => { + expect(getThinkingPolicyForModel("openai:gpt-5.1-codex-max")).toEqual([ + "off", + "low", + "medium", + "high", + "xhigh", + ]); + }); + + test("returns 5 levels for gpt-5.1-codex-max with version suffix", () => { + expect(getThinkingPolicyForModel("openai:gpt-5.1-codex-max-2025-12-01")).toEqual([ + "off", + "low", + "medium", + "high", + "xhigh", + ]); + }); + + test("returns 5 levels for bare gpt-5.1-codex-max without prefix", () => { + expect(getThinkingPolicyForModel("gpt-5.1-codex-max")).toEqual([ + "off", + "low", + "medium", + "high", + "xhigh", + ]); + }); + + test("returns 5 levels for codex-max alias", () => { + expect(getThinkingPolicyForModel("codex-max")).toEqual([ + "off", + "low", + "medium", + "high", + "xhigh", + ]); + }); + + test("returns 5 levels for gpt-5.1-codex-max with whitespace after colon", () => { + expect(getThinkingPolicyForModel("openai: gpt-5.1-codex-max")).toEqual([ + "off", + "low", + "medium", + "high", + "xhigh", + ]); + }); + test("returns single HIGH for gpt-5-pro base model", () => { expect(getThinkingPolicyForModel("openai:gpt-5-pro")).toEqual(["high"]); }); @@ -111,6 +161,32 @@ describe("enforceThinkingPolicy", () => { expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("off"); }); }); + + describe("GPT-5.1-Codex-Max (5 levels including xhigh)", () => { + test("allows all 5 levels including xhigh", () => { + expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "off")).toBe("off"); + expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "low")).toBe("low"); + expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "medium")).toBe("medium"); + expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "high")).toBe("high"); + expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "xhigh")).toBe("xhigh"); + }); + + test("allows xhigh for versioned model", () => { + expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max-2025-12-01", "xhigh")).toBe("xhigh"); + }); + }); + + describe("xhigh fallback for non-codex-max models", () => { + test("falls back to medium when xhigh requested on standard model", () => { + // Standard models don't support xhigh, so fall back to medium (preferred fallback) + expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "xhigh")).toBe("medium"); + }); + + test("falls back to high when xhigh requested on gpt-5-pro", () => { + // gpt-5-pro only supports high, so xhigh falls back to high + expect(enforceThinkingPolicy("openai:gpt-5-pro", "xhigh")).toBe("high"); + }); + }); }); // Note: Tests for invalid levels removed - TypeScript type system prevents invalid diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts index 4346d9272d..a39e34d056 100644 --- a/src/browser/utils/thinking/policy.ts +++ b/src/browser/utils/thinking/policy.ts @@ -24,26 +24,35 @@ export type ThinkingPolicy = readonly ThinkingLevel[]; * Returns the thinking policy for a given model. * * Rules: + * - openai:gpt-5.1-codex-max → ["off", "low", "medium", "high", "xhigh"] (5 levels including xhigh) * - openai:gpt-5-pro → ["high"] (only supported level) * - gemini-3 → ["low", "high"] (thinking level only) - * - default → ["off", "low", "medium", "high"] (all levels selectable) + * - default → ["off", "low", "medium", "high"] (standard 4 levels) * * Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06). * Does NOT match gpt-5-pro-mini (uses negative lookahead). */ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy { - // Match "openai:" followed by optional whitespace and "gpt-5-pro" - // Allow version suffixes like "-2025-10-06" but NOT "-mini" or other text suffixes - if (/^openai:\s*gpt-5-pro(?!-[a-z])/.test(modelString)) { + // Normalize to be robust to provider prefixes, whitespace, and version suffixes + const normalized = modelString.trim().toLowerCase(); + const withoutPrefix = normalized.replace(/^[a-z0-9_-]+:\s*/, ""); + + // GPT-5.1-Codex-Max supports 5 reasoning levels including xhigh (Extra High) + if (withoutPrefix.startsWith("gpt-5.1-codex-max") || withoutPrefix.startsWith("codex-max")) { + return ["off", "low", "medium", "high", "xhigh"]; + } + + // gpt-5-pro (not mini) with optional version suffix + if (/^gpt-5-pro(?!-[a-z])/.test(withoutPrefix)) { return ["high"]; } // Gemini 3 Pro only supports "low" and "high" reasoning levels - if (modelString.includes("gemini-3")) { + if (withoutPrefix.includes("gemini-3")) { return ["low", "high"]; } - // Default policy: all levels selectable + // Default policy: standard 4 levels (xhigh only for codex-max) return ["off", "low", "medium", "high"]; } diff --git a/src/common/constants/knownModels.ts b/src/common/constants/knownModels.ts index 77caa2adfd..e11cb7a9a0 100644 --- a/src/common/constants/knownModels.ts +++ b/src/common/constants/knownModels.ts @@ -70,6 +70,13 @@ const MODEL_DEFINITIONS = { providerModelId: "gpt-5.1-codex-mini", aliases: ["codex-mini"], }, + GPT_CODEX_MAX: { + provider: "openai", + providerModelId: "gpt-5.1-codex-max", + aliases: ["codex-max"], + warm: true, + tokenizerOverride: "openai/gpt-5", + }, GEMINI_3_PRO: { provider: "google", providerModelId: "gemini-3-pro-preview", diff --git a/src/common/orpc/schemas/stream.ts b/src/common/orpc/schemas/stream.ts index f8c8ff7550..9a2d6b092c 100644 --- a/src/common/orpc/schemas/stream.ts +++ b/src/common/orpc/schemas/stream.ts @@ -313,7 +313,7 @@ export const ToolPolicySchema = z.array(ToolPolicyFilterSchema).meta({ // SendMessage options export const SendMessageOptionsSchema = z.object({ editMessageId: z.string().optional(), - thinkingLevel: z.enum(["off", "low", "medium", "high"]).optional(), + thinkingLevel: z.enum(["off", "low", "medium", "high", "xhigh"]).optional(), model: z.string("No model specified"), toolPolicy: ToolPolicySchema.optional(), additionalSystemInstructions: z.string().optional(), diff --git a/src/common/orpc/schemas/telemetry.ts b/src/common/orpc/schemas/telemetry.ts index 6dec67c06c..f57156cf58 100644 --- a/src/common/orpc/schemas/telemetry.ts +++ b/src/common/orpc/schemas/telemetry.ts @@ -29,7 +29,7 @@ const FrontendPlatformInfoSchema = z.object({ }); // Thinking level enum (matches payload.ts TelemetryThinkingLevel) -const TelemetryThinkingLevelSchema = z.enum(["off", "low", "medium", "high"]); +const TelemetryThinkingLevelSchema = z.enum(["off", "low", "medium", "high", "xhigh"]); // Command type enum (matches payload.ts TelemetryCommandType) const TelemetryCommandTypeSchema = z.enum([ diff --git a/src/common/telemetry/payload.ts b/src/common/telemetry/payload.ts index 45c42415fc..d70aeff0af 100644 --- a/src/common/telemetry/payload.ts +++ b/src/common/telemetry/payload.ts @@ -86,7 +86,7 @@ export interface WorkspaceSwitchedPayload { /** * Thinking level for extended thinking feature */ -export type TelemetryThinkingLevel = "off" | "low" | "medium" | "high"; +export type TelemetryThinkingLevel = "off" | "low" | "medium" | "high" | "xhigh"; /** * Chat/AI interaction events diff --git a/src/common/types/thinking.ts b/src/common/types/thinking.ts index f6283d067a..a30d258793 100644 --- a/src/common/types/thinking.ts +++ b/src/common/types/thinking.ts @@ -5,7 +5,7 @@ * different AI providers (Anthropic, OpenAI, etc.) */ -export type ThinkingLevel = "off" | "low" | "medium" | "high"; +export type ThinkingLevel = "off" | "low" | "medium" | "high" | "xhigh"; /** * Active thinking levels (excludes "off") @@ -30,6 +30,7 @@ export const ANTHROPIC_THINKING_BUDGETS: Record = { low: 4000, medium: 10000, high: 20000, + xhigh: 20000, // Same as high - Anthropic doesn't support xhigh }; /** @@ -47,6 +48,7 @@ export const ANTHROPIC_EFFORT: Record low: "low", medium: "medium", high: "high", + xhigh: "high", // Fallback to high - Anthropic doesn't support xhigh }; /** @@ -66,6 +68,7 @@ export const OPENAI_REASONING_EFFORT: Record low: "low", medium: "medium", high: "high", + xhigh: "xhigh", // Extra High - only supported by gpt-5.1-codex-max }; /** @@ -83,6 +86,7 @@ export const GEMINI_THINKING_BUDGETS: Record = { low: 2048, medium: 8192, high: 16384, // Conservative max (some models go to 32k) + xhigh: 16384, // Same as high - Gemini doesn't support xhigh } as const; export const OPENROUTER_REASONING_EFFORT: Record< ThinkingLevel, @@ -92,4 +96,5 @@ export const OPENROUTER_REASONING_EFFORT: Record< low: "low", medium: "medium", high: "high", + xhigh: "high", // Fallback to high - OpenRouter doesn't support xhigh }; diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index 202c5b8e4f..b1d3b19037 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -254,8 +254,11 @@ export function buildProviderOptions( }; if (isGemini3) { - // Gemini 3 uses thinkingLevel (low/high) - thinkingConfig.thinkingLevel = effectiveThinking === "medium" ? "low" : effectiveThinking; + // Gemini 3 uses thinkingLevel (low/high) - map medium/xhigh to supported values + thinkingConfig.thinkingLevel = + effectiveThinking === "medium" || effectiveThinking === "xhigh" + ? "high" + : effectiveThinking; } else { // Gemini 2.5 uses thinkingBudget const budget = GEMINI_THINKING_BUDGETS[effectiveThinking]; diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts index 6e496c6ced..62f753e1d2 100644 --- a/src/common/utils/tokens/models-extra.ts +++ b/src/common/utils/tokens/models-extra.ts @@ -88,4 +88,21 @@ export const modelsExtra: Record = { supports_reasoning: true, supports_response_schema: true, }, + + // GPT-5.1-Codex-Max - Extended reasoning model with xhigh support + // Same pricing as gpt-5.1-codex: $1.25/M input, $10/M output + // Supports 5 reasoning levels: off, low, medium, high, xhigh + "gpt-5.1-codex-max": { + max_input_tokens: 272000, // Same as gpt-5.1-codex + max_output_tokens: 128000, // Same as gpt-5.1-codex + input_cost_per_token: 0.00000125, // $1.25 per million input tokens + output_cost_per_token: 0.00001, // $10 per million output tokens + litellm_provider: "openai", + mode: "chat", + supports_function_calling: true, + supports_vision: true, + supports_reasoning: true, + supports_response_schema: true, + supported_endpoints: ["/v1/responses"], + }, }; diff --git a/src/common/utils/tokens/models.json b/src/common/utils/tokens/models.json index 855e0ae542..cb51542836 100644 --- a/src/common/utils/tokens/models.json +++ b/src/common/utils/tokens/models.json @@ -1,24 +1,24 @@ { "sample_spec": { - "code_interpreter_cost_per_session": 0.0, - "computer_use_input_cost_per_1k_tokens": 0.0, - "computer_use_output_cost_per_1k_tokens": 0.0, + "code_interpreter_cost_per_session": 0, + "computer_use_input_cost_per_1k_tokens": 0, + "computer_use_output_cost_per_1k_tokens": 0, "deprecation_date": "date when the model becomes deprecated in the format YYYY-MM-DD", - "file_search_cost_per_1k_calls": 0.0, - "file_search_cost_per_gb_per_day": 0.0, - "input_cost_per_audio_token": 0.0, - "input_cost_per_token": 0.0, + "file_search_cost_per_1k_calls": 0, + "file_search_cost_per_gb_per_day": 0, + "input_cost_per_audio_token": 0, + "input_cost_per_token": 0, "litellm_provider": "one of https://docs.litellm.ai/docs/providers", "max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens", "max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens", "max_tokens": "LEGACY parameter. set to max_output_tokens if provider specifies it. IF not set to max_input_tokens, if provider specifies it.", "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank, search", - "output_cost_per_reasoning_token": 0.0, - "output_cost_per_token": 0.0, + "output_cost_per_reasoning_token": 0, + "output_cost_per_token": 0, "search_context_cost_per_query": { - "search_context_size_high": 0.0, - "search_context_size_low": 0.0, - "search_context_size_medium": 0.0 + "search_context_size_high": 0, + "search_context_size_low": 0, + "search_context_size_medium": 0 }, "supported_regions": ["global", "us-west-2", "eu-west-1", "ap-southeast-1", "ap-northeast-1"], "supports_audio_input": true, @@ -31,7 +31,7 @@ "supports_system_messages": true, "supports_vision": true, "supports_web_search": true, - "vector_store_cost_per_gb_per_day": 0.0 + "vector_store_cost_per_gb_per_day": 0 }, "1024-x-1024/50-steps/bedrock/amazon.nova-canvas-v1:0": { "litellm_provider": "bedrock", @@ -50,7 +50,7 @@ "input_cost_per_pixel": 1.9e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "1024-x-1024/max-steps/stability.stable-diffusion-xl-v1": { "litellm_provider": "bedrock", @@ -63,7 +63,7 @@ "input_cost_per_pixel": 2.4414e-7, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { "litellm_provider": "bedrock", @@ -76,7 +76,7 @@ "input_cost_per_pixel": 6.86e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "512-x-512/max-steps/stability.stable-diffusion-xl-v0": { "litellm_provider": "bedrock", @@ -86,31 +86,31 @@ "output_cost_per_image": 0.036 }, "ai21.j2-mid-v1": { - "input_cost_per_token": 1.25e-5, + "input_cost_per_token": 0.0000125, "litellm_provider": "bedrock", "max_input_tokens": 8191, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 1.25e-5 + "output_cost_per_token": 0.0000125 }, "ai21.j2-ultra-v1": { - "input_cost_per_token": 1.88e-5, + "input_cost_per_token": 0.0000188, "litellm_provider": "bedrock", "max_input_tokens": 8191, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 1.88e-5 + "output_cost_per_token": 0.0000188 }, "ai21.jamba-1-5-large-v1:0": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "bedrock", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 8e-6 + "output_cost_per_token": 0.000008 }, "ai21.jamba-1-5-mini-v1:0": { "input_cost_per_token": 2e-7, @@ -223,6 +223,12 @@ "source": "https://docs.aimlapi.com/", "supported_endpoints": ["/v1/images/generations"] }, + "amazon.nova-canvas-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 2600, + "mode": "image_generation", + "output_cost_per_image": 0.06 + }, "amazon.nova-lite-v1:0": { "input_cost_per_token": 6e-8, "litellm_provider": "bedrock_converse", @@ -237,6 +243,70 @@ "supports_response_schema": true, "supports_vision": true }, + "amazon.nova-2-lite-v1:0": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "apac.amazon.nova-2-lite-v1:0": { + "input_cost_per_token": 6e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.00000275, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "eu.amazon.nova-2-lite-v1:0": { + "input_cost_per_token": 6e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.00000275, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "us.amazon.nova-2-lite-v1:0": { + "input_cost_per_token": 6e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.00000275, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, "amazon.nova-micro-v1:0": { "input_cost_per_token": 3.5e-8, "litellm_provider": "bedrock_converse", @@ -256,7 +326,7 @@ "max_output_tokens": 10000, "max_tokens": 10000, "mode": "chat", - "output_cost_per_token": 3.2e-6, + "output_cost_per_token": 0.0000032, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, @@ -265,7 +335,7 @@ }, "amazon.rerank-v1:0": { "input_cost_per_query": 0.001, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "bedrock", "max_document_chunks_per_query": 100, "max_input_tokens": 32000, @@ -274,10 +344,10 @@ "max_tokens": 32000, "max_tokens_per_document_chunk": 512, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "amazon.titan-embed-image-v1": { - "input_cost_per_image": 6e-5, + "input_cost_per_image": 0.00006, "input_cost_per_token": 8e-7, "litellm_provider": "bedrock", "max_input_tokens": 128, @@ -286,7 +356,7 @@ "notes": "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead." }, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024, "source": "https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=amazon.titan-image-generator-v1", "supports_embedding_image_input": true, @@ -298,7 +368,7 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1536 }, "amazon.titan-embed-text-v2:0": { @@ -307,11 +377,11 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024 }, "amazon.titan-image-generator-v1": { - "input_cost_per_image": 0.0, + "input_cost_per_image": 0, "output_cost_per_image": 0.008, "output_cost_per_image_premium_image": 0.01, "output_cost_per_image_above_512_and_512_pixels": 0.01, @@ -320,7 +390,16 @@ "mode": "image_generation" }, "amazon.titan-image-generator-v2": { - "input_cost_per_image": 0.0, + "input_cost_per_image": 0, + "output_cost_per_image": 0.008, + "output_cost_per_image_premium_image": 0.01, + "output_cost_per_image_above_1024_and_1024_pixels": 0.01, + "output_cost_per_image_above_1024_and_1024_pixels_and_premium_image": 0.012, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "amazon.titan-image-generator-v2:0": { + "input_cost_per_image": 0, "output_cost_per_image": 0.008, "output_cost_per_image_premium_image": 0.01, "output_cost_per_image_above_1024_and_1024_pixels": 0.01, @@ -329,18 +408,18 @@ "mode": "image_generation" }, "twelvelabs.marengo-embed-2-7-v1:0": { - "input_cost_per_token": 7e-5, + "input_cost_per_token": 0.00007, "litellm_provider": "bedrock", "max_input_tokens": 77, "max_tokens": 77, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024, "supports_embedding_image_input": true, "supports_image_input": true }, "us.twelvelabs.marengo-embed-2-7-v1:0": { - "input_cost_per_token": 7e-5, + "input_cost_per_token": 0.00007, "input_cost_per_video_per_second": 0.0007, "input_cost_per_audio_per_second": 0.00014, "input_cost_per_image": 0.0001, @@ -348,13 +427,13 @@ "max_input_tokens": 77, "max_tokens": 77, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024, "supports_embedding_image_input": true, "supports_image_input": true }, "eu.twelvelabs.marengo-embed-2-7-v1:0": { - "input_cost_per_token": 7e-5, + "input_cost_per_token": 0.00007, "input_cost_per_video_per_second": 0.0007, "input_cost_per_audio_per_second": 0.00014, "input_cost_per_image": 0.0001, @@ -362,40 +441,40 @@ "max_input_tokens": 77, "max_tokens": 77, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024, "supports_embedding_image_input": true, "supports_image_input": true }, "twelvelabs.pegasus-1-2-v1:0": { "input_cost_per_video_per_second": 0.00049, - "output_cost_per_token": 7.5e-6, + "output_cost_per_token": 0.0000075, "litellm_provider": "bedrock", "mode": "chat", "supports_video_input": true }, "us.twelvelabs.pegasus-1-2-v1:0": { "input_cost_per_video_per_second": 0.00049, - "output_cost_per_token": 7.5e-6, + "output_cost_per_token": 0.0000075, "litellm_provider": "bedrock", "mode": "chat", "supports_video_input": true }, "eu.twelvelabs.pegasus-1-2-v1:0": { "input_cost_per_video_per_second": 0.00049, - "output_cost_per_token": 7.5e-6, + "output_cost_per_token": 0.0000075, "litellm_provider": "bedrock", "mode": "chat", "supports_video_input": true }, "amazon.titan-text-express-v1": { - "input_cost_per_token": 1.3e-6, + "input_cost_per_token": 0.0000013, "litellm_provider": "bedrock", "max_input_tokens": 42000, "max_output_tokens": 8000, "max_tokens": 8000, "mode": "chat", - "output_cost_per_token": 1.7e-6 + "output_cost_per_token": 0.0000017 }, "amazon.titan-text-lite-v1": { "input_cost_per_token": 3e-7, @@ -413,10 +492,10 @@ "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 1.5e-6 + "output_cost_per_token": 0.0000015 }, "anthropic.claude-3-5-haiku-20241022-v1:0": { - "cache_creation_input_token_cost": 1e-6, + "cache_creation_input_token_cost": 0.000001, "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 8e-7, "litellm_provider": "bedrock", @@ -424,7 +503,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -433,15 +512,15 @@ "supports_tool_choice": true }, "anthropic.claude-haiku-4-5-20251001-v1:0": { - "cache_creation_input_token_cost": 1.25e-6, + "cache_creation_input_token_cost": 0.00000125, "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, "supports_computer_use": true, @@ -455,15 +534,15 @@ "tool_use_system_prompt_tokens": 346 }, "anthropic.claude-haiku-4-5@20251001": { - "cache_creation_input_token_cost": 1.25e-6, + "cache_creation_input_token_cost": 0.00000125, "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, "supports_computer_use": true, @@ -477,13 +556,13 @@ "tool_use_system_prompt_tokens": 346 }, "anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -491,15 +570,15 @@ "supports_vision": true }, "anthropic.claude-3-5-sonnet-20241022-v2:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -510,15 +589,15 @@ "supports_vision": true }, "anthropic.claude-3-7-sonnet-20240620-v1:0": { - "cache_creation_input_token_cost": 4.5e-6, + "cache_creation_input_token_cost": 0.0000045, "cache_read_input_token_cost": 3.6e-7, - "input_cost_per_token": 3.6e-6, + "input_cost_per_token": 0.0000036, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.8e-5, + "output_cost_per_token": 0.000018, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -530,15 +609,15 @@ "supports_vision": true }, "anthropic.claude-3-7-sonnet-20250219-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -556,7 +635,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -564,26 +643,26 @@ "supports_vision": true }, "anthropic.claude-3-opus-20240229-v1:0": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true }, "anthropic.claude-3-sonnet-20240229-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -597,19 +676,19 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-6, + "output_cost_per_token": 0.0000024, "supports_tool_choice": true }, "anthropic.claude-opus-4-1-20250805-v1:0": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -627,15 +706,41 @@ "tool_use_system_prompt_tokens": 159 }, "anthropic.claude-opus-4-20250514-v1:0": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -653,19 +758,19 @@ "tool_use_system_prompt_tokens": 159 }, "anthropic.claude-sonnet-4-20250514-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -683,19 +788,19 @@ "tool_use_system_prompt_tokens": 159 }, "anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -713,22 +818,22 @@ "tool_use_system_prompt_tokens": 159 }, "anthropic.claude-v1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5 + "output_cost_per_token": 0.000024 }, "anthropic.claude-v2:1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "anyscale/HuggingFaceH4/zephyr-7b-beta": { @@ -741,22 +846,22 @@ "output_cost_per_token": 1.5e-7 }, "anyscale/codellama/CodeLlama-34b-Instruct-hf": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "anyscale", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "anyscale/codellama/CodeLlama-70b-Instruct-hf": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "anyscale", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/codellama-CodeLlama-70b-Instruct-hf" }, "anyscale/google/gemma-7b-it": { @@ -779,13 +884,13 @@ "output_cost_per_token": 2.5e-7 }, "anyscale/meta-llama/Llama-2-70b-chat-hf": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "anyscale", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "anyscale/meta-llama/Llama-2-7b-chat-hf": { "input_cost_per_token": 1.5e-7, @@ -797,13 +902,13 @@ "output_cost_per_token": 1.5e-7 }, "anyscale/meta-llama/Meta-Llama-3-70B-Instruct": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "anyscale", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-70B-Instruct" }, "anyscale/meta-llama/Meta-Llama-3-8B-Instruct": { @@ -882,7 +987,7 @@ "max_output_tokens": 10000, "max_tokens": 10000, "mode": "chat", - "output_cost_per_token": 3.36e-6, + "output_cost_per_token": 0.00000336, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, @@ -890,13 +995,13 @@ "supports_vision": true }, "apac.anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -904,15 +1009,15 @@ "supports_vision": true }, "apac.anthropic.claude-3-5-sonnet-20241022-v2:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -929,7 +1034,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -937,15 +1042,15 @@ "supports_vision": true }, "apac.anthropic.claude-haiku-4-5-20251001-v1:0": { - "cache_creation_input_token_cost": 1.375e-6, + "cache_creation_input_token_cost": 0.000001375, "cache_read_input_token_cost": 1.1e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5.5e-6, + "output_cost_per_token": 0.0000055, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, "supports_computer_use": true, @@ -959,13 +1064,13 @@ "tool_use_system_prompt_tokens": 346 }, "apac.anthropic.claude-3-sonnet-20240229-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -973,19 +1078,19 @@ "supports_vision": true }, "apac.anthropic.claude-sonnet-4-20250514-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -1003,31 +1108,31 @@ "tool_use_system_prompt_tokens": 159 }, "assemblyai/best": { - "input_cost_per_second": 3.333e-5, + "input_cost_per_second": 0.00003333, "litellm_provider": "assemblyai", "mode": "audio_transcription", - "output_cost_per_second": 0.0 + "output_cost_per_second": 0 }, "assemblyai/nano": { "input_cost_per_second": 0.00010278, "litellm_provider": "assemblyai", "mode": "audio_transcription", - "output_cost_per_second": 0.0 + "output_cost_per_second": 0 }, "au.anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 4.125e-6, + "cache_creation_input_token_cost": 0.000004125, "cache_read_input_token_cost": 3.3e-7, - "input_cost_per_token": 3.3e-6, - "input_cost_per_token_above_200k_tokens": 6.6e-6, - "output_cost_per_token_above_200k_tokens": 2.475e-5, - "cache_creation_input_token_cost_above_200k_tokens": 8.25e-6, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.65e-5, + "output_cost_per_token": 0.0000165, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -1050,17 +1155,17 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/codex-mini": { "cache_read_input_token_cost": 3.75e-7, - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -1075,23 +1180,77 @@ "supports_vision": true }, "azure/command-r-plus": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true }, + "azure/claude-haiku-4-5": { + "input_cost_per_token": 0.000001, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000005, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/claude-opus-4-1": { + "input_cost_per_token": 0.000015, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.000075, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/claude-sonnet-4-5": { + "input_cost_per_token": 0.000003, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, "azure/computer-use-preview": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "azure", "max_input_tokens": 8192, "max_output_tokens": 1024, "max_tokens": 1024, "mode": "chat", - "output_cost_per_token": 1.2e-5, + "output_cost_per_token": 0.000012, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -1111,14 +1270,14 @@ }, "azure/eu/gpt-4o-2024-08-06": { "deprecation_date": "2026-02-27", - "cache_read_input_token_cost": 1.375e-6, - "input_cost_per_token": 2.75e-6, + "cache_read_input_token_cost": 0.000001375, + "input_cost_per_token": 0.00000275, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -1128,14 +1287,14 @@ }, "azure/eu/gpt-4o-2024-11-20": { "deprecation_date": "2026-03-01", - "cache_creation_input_token_cost": 1.38e-6, - "input_cost_per_token": 2.75e-6, + "cache_creation_input_token_cost": 0.00000138, + "input_cost_per_token": 0.00000275, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_response_schema": true, @@ -1161,15 +1320,15 @@ "azure/eu/gpt-4o-mini-realtime-preview-2024-12-17": { "cache_creation_input_audio_token_cost": 3.3e-7, "cache_read_input_token_cost": 3.3e-7, - "input_cost_per_audio_token": 1.1e-5, + "input_cost_per_audio_token": 0.000011, "input_cost_per_token": 6.6e-7, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 2.2e-5, - "output_cost_per_token": 2.64e-6, + "output_cost_per_audio_token": 0.000022, + "output_cost_per_token": 0.00000264, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -1178,17 +1337,17 @@ "supports_tool_choice": true }, "azure/eu/gpt-4o-realtime-preview-2024-10-01": { - "cache_creation_input_audio_token_cost": 2.2e-5, - "cache_read_input_token_cost": 2.75e-6, + "cache_creation_input_audio_token_cost": 0.000022, + "cache_read_input_token_cost": 0.00000275, "input_cost_per_audio_token": 0.00011, - "input_cost_per_token": 5.5e-6, + "input_cost_per_token": 0.0000055, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", "output_cost_per_audio_token": 0.00022, - "output_cost_per_token": 2.2e-5, + "output_cost_per_token": 0.000022, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -1197,17 +1356,17 @@ "supports_tool_choice": true }, "azure/eu/gpt-4o-realtime-preview-2024-12-17": { - "cache_read_input_audio_token_cost": 2.5e-6, - "cache_read_input_token_cost": 2.75e-6, - "input_cost_per_audio_token": 4.4e-5, - "input_cost_per_token": 5.5e-6, + "cache_read_input_audio_token_cost": 0.0000025, + "cache_read_input_token_cost": 0.00000275, + "input_cost_per_audio_token": 0.000044, + "input_cost_per_token": 0.0000055, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 2.2e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.000022, "supported_modalities": ["text", "audio"], "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, @@ -1219,13 +1378,13 @@ }, "azure/eu/gpt-5-2025-08-07": { "cache_read_input_token_cost": 1.375e-7, - "input_cost_per_token": 1.375e-6, + "input_cost_per_token": 0.000001375, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -1248,7 +1407,7 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2.2e-6, + "output_cost_per_token": 0.0000022, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -1265,13 +1424,13 @@ }, "azure/eu/gpt-5.1": { "cache_read_input_token_cost": 1.4e-7, - "input_cost_per_token": 1.38e-6, + "input_cost_per_token": 0.00000138, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "image"], @@ -1288,13 +1447,13 @@ }, "azure/eu/gpt-5.1-chat": { "cache_read_input_token_cost": 1.4e-7, - "input_cost_per_token": 1.38e-6, + "input_cost_per_token": 0.00000138, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "image"], @@ -1311,13 +1470,13 @@ }, "azure/eu/gpt-5.1-codex": { "cache_read_input_token_cost": 1.4e-7, - "input_cost_per_token": 1.38e-6, + "input_cost_per_token": 0.00000138, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -1340,7 +1499,7 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 2.2e-6, + "output_cost_per_token": 0.0000022, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -1379,14 +1538,14 @@ "supports_vision": true }, "azure/eu/o1-2024-12-17": { - "cache_read_input_token_cost": 8.25e-6, - "input_cost_per_token": 1.65e-5, + "cache_read_input_token_cost": 0.00000825, + "input_cost_per_token": 0.0000165, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 6.6e-5, + "output_cost_per_token": 0.000066, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -1395,29 +1554,29 @@ }, "azure/eu/o1-mini-2024-09-12": { "cache_read_input_token_cost": 6.05e-7, - "input_cost_per_token": 1.21e-6, + "input_cost_per_token": 0.00000121, "input_cost_per_token_batches": 6.05e-7, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 4.84e-6, - "output_cost_per_token_batches": 2.42e-6, + "output_cost_per_token": 0.00000484, + "output_cost_per_token_batches": 0.00000242, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, "supports_vision": false }, "azure/eu/o1-preview-2024-09-12": { - "cache_read_input_token_cost": 8.25e-6, - "input_cost_per_token": 1.65e-5, + "cache_read_input_token_cost": 0.00000825, + "input_cost_per_token": 0.0000165, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6.6e-5, + "output_cost_per_token": 0.000066, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -1425,30 +1584,30 @@ }, "azure/eu/o3-mini-2025-01-31": { "cache_read_input_token_cost": 6.05e-7, - "input_cost_per_token": 1.21e-6, + "input_cost_per_token": 0.00000121, "input_cost_per_token_batches": 6.05e-7, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.84e-6, - "output_cost_per_token_batches": 2.42e-6, + "output_cost_per_token": 0.00000484, + "output_cost_per_token_batches": 0.00000242, "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, "supports_vision": false }, "azure/global-standard/gpt-4o-2024-08-06": { - "cache_read_input_token_cost": 1.25e-6, + "cache_read_input_token_cost": 0.00000125, "deprecation_date": "2026-02-27", - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -1457,15 +1616,15 @@ "supports_vision": true }, "azure/global-standard/gpt-4o-2024-11-20": { - "cache_read_input_token_cost": 1.25e-6, + "cache_read_input_token_cost": 0.00000125, "deprecation_date": "2026-03-01", - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_response_schema": true, @@ -1488,14 +1647,14 @@ }, "azure/global/gpt-4o-2024-08-06": { "deprecation_date": "2026-02-27", - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -1505,14 +1664,14 @@ }, "azure/global/gpt-4o-2024-11-20": { "deprecation_date": "2026-03-01", - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -1522,13 +1681,13 @@ }, "azure/global/gpt-5.1": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "image"], @@ -1545,13 +1704,13 @@ }, "azure/global/gpt-5.1-chat": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "image"], @@ -1568,13 +1727,13 @@ }, "azure/global/gpt-5.1-codex": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -1597,7 +1756,7 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -1619,7 +1778,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_tool_choice": true }, @@ -1631,18 +1790,18 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "azure/gpt-3.5-turbo-instruct-0914": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "azure_text", "max_input_tokens": 4097, "max_tokens": 4097, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "azure/gpt-35-turbo": { "input_cost_per_token": 5e-7, @@ -1651,7 +1810,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_tool_choice": true }, @@ -1663,7 +1822,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true @@ -1676,122 +1835,122 @@ "max_output_tokens": 4096, "max_tokens": 4097, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "azure/gpt-35-turbo-0613": { "deprecation_date": "2025-02-13", - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "azure", "max_input_tokens": 4097, "max_output_tokens": 4096, "max_tokens": 4097, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "azure/gpt-35-turbo-1106": { "deprecation_date": "2025-03-31", - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "azure", "max_input_tokens": 16384, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "azure/gpt-35-turbo-16k": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "azure", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_tool_choice": true }, "azure/gpt-35-turbo-16k-0613": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "azure", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_function_calling": true, "supports_tool_choice": true }, "azure/gpt-35-turbo-instruct": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "azure_text", "max_input_tokens": 4097, "max_tokens": 4097, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "azure/gpt-35-turbo-instruct-0914": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "azure_text", "max_input_tokens": 4097, "max_tokens": 4097, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "azure/gpt-4": { - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "azure", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_tool_choice": true }, "azure/gpt-4-0125-preview": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "azure/gpt-4-0613": { - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "azure", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_tool_choice": true }, "azure/gpt-4-1106-preview": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "azure/gpt-4-32k": { - "input_cost_per_token": 6e-5, + "input_cost_per_token": 0.00006, "litellm_provider": "azure", "max_input_tokens": 32768, "max_output_tokens": 4096, @@ -1801,7 +1960,7 @@ "supports_tool_choice": true }, "azure/gpt-4-32k-0613": { - "input_cost_per_token": 6e-5, + "input_cost_per_token": 0.00006, "litellm_provider": "azure", "max_input_tokens": 32768, "max_output_tokens": 4096, @@ -1811,52 +1970,52 @@ "supports_tool_choice": true }, "azure/gpt-4-turbo": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "azure/gpt-4-turbo-2024-04-09": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "azure/gpt-4-turbo-vision-preview": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_tool_choice": true, "supports_vision": true }, "azure/gpt-4.1": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "azure", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8e-6, - "output_cost_per_token_batches": 4e-6, + "output_cost_per_token": 0.000008, + "output_cost_per_token_batches": 0.000004, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -1873,15 +2032,15 @@ "azure/gpt-4.1-2025-04-14": { "deprecation_date": "2026-11-04", "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "azure", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8e-6, - "output_cost_per_token_batches": 4e-6, + "output_cost_per_token": 0.000008, + "output_cost_per_token_batches": 0.000004, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -1904,7 +2063,7 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], @@ -1929,7 +2088,7 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], @@ -1992,16 +2151,16 @@ "supports_vision": true }, "azure/gpt-4.5-preview": { - "cache_read_input_token_cost": 3.75e-5, - "input_cost_per_token": 7.5e-5, - "input_cost_per_token_batches": 3.75e-5, + "cache_read_input_token_cost": 0.0000375, + "input_cost_per_token": 0.000075, + "input_cost_per_token_batches": 0.0000375, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00015, - "output_cost_per_token_batches": 7.5e-5, + "output_cost_per_token_batches": 0.000075, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2011,14 +2170,14 @@ "supports_vision": true }, "azure/gpt-4o": { - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2027,13 +2186,13 @@ "supports_vision": true }, "azure/gpt-4o-2024-05-13": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2042,14 +2201,14 @@ }, "azure/gpt-4o-2024-08-06": { "deprecation_date": "2026-02-27", - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2059,14 +2218,14 @@ }, "azure/gpt-4o-2024-11-20": { "deprecation_date": "2026-03-01", - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.75e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.00000275, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2075,15 +2234,15 @@ "supports_vision": true }, "azure/gpt-audio-2025-08-28": { - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 1e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/chat/completions"], "supported_modalities": ["text", "audio"], "supported_output_modalities": ["text", "audio"], @@ -2098,15 +2257,15 @@ "supports_vision": false }, "azure/gpt-audio-mini-2025-10-06": { - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_token": 6e-7, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 2e-5, - "output_cost_per_token": 2.4e-6, + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, "supported_endpoints": ["/v1/chat/completions"], "supported_modalities": ["text", "audio"], "supported_output_modalities": ["text", "audio"], @@ -2121,15 +2280,15 @@ "supports_vision": false }, "azure/gpt-4o-audio-preview-2024-12-17": { - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 1e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/chat/completions"], "supported_modalities": ["text", "audio"], "supported_output_modalities": ["text", "audio"], @@ -2176,15 +2335,15 @@ "supports_vision": true }, "azure/gpt-4o-mini-audio-preview-2024-12-17": { - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 1e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/chat/completions"], "supported_modalities": ["text", "audio"], "supported_output_modalities": ["text", "audio"], @@ -2201,15 +2360,15 @@ "azure/gpt-4o-mini-realtime-preview-2024-12-17": { "cache_creation_input_audio_token_cost": 3e-7, "cache_read_input_token_cost": 3e-7, - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_token": 6e-7, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 2e-5, - "output_cost_per_token": 2.4e-6, + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -2218,18 +2377,18 @@ "supports_tool_choice": true }, "azure/gpt-realtime-2025-08-28": { - "cache_creation_input_audio_token_cost": 4e-6, - "cache_read_input_token_cost": 4e-6, - "input_cost_per_audio_token": 3.2e-5, - "input_cost_per_image": 5e-6, - "input_cost_per_token": 4e-6, + "cache_creation_input_audio_token_cost": 0.000004, + "cache_read_input_token_cost": 0.000004, + "input_cost_per_audio_token": 0.000032, + "input_cost_per_image": 0.000005, + "input_cost_per_token": 0.000004, "litellm_provider": "azure", "max_input_tokens": 32000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 6.4e-5, - "output_cost_per_token": 1.6e-5, + "output_cost_per_audio_token": 0.000064, + "output_cost_per_token": 0.000016, "supported_endpoints": ["/v1/realtime"], "supported_modalities": ["text", "image", "audio"], "supported_output_modalities": ["text", "audio"], @@ -2243,7 +2402,7 @@ "azure/gpt-realtime-mini-2025-10-06": { "cache_creation_input_audio_token_cost": 3e-7, "cache_read_input_token_cost": 6e-8, - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_image": 8e-7, "input_cost_per_token": 6e-7, "litellm_provider": "azure", @@ -2251,8 +2410,8 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 2e-5, - "output_cost_per_token": 2.4e-6, + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, "supported_endpoints": ["/v1/realtime"], "supported_modalities": ["text", "image", "audio"], "supported_output_modalities": ["text", "audio"], @@ -2264,38 +2423,38 @@ "supports_tool_choice": true }, "azure/gpt-4o-mini-transcribe": { - "input_cost_per_audio_token": 3e-6, - "input_cost_per_token": 1.25e-6, + "input_cost_per_audio_token": 0.000003, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 16000, "max_output_tokens": 2000, "mode": "audio_transcription", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supported_endpoints": ["/v1/audio/transcriptions"] }, "azure/gpt-4o-mini-tts": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "mode": "audio_speech", - "output_cost_per_audio_token": 1.2e-5, + "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/audio/speech"], "supported_modalities": ["text", "audio"], "supported_output_modalities": ["audio"] }, "azure/gpt-4o-realtime-preview-2024-10-01": { - "cache_creation_input_audio_token_cost": 2e-5, - "cache_read_input_token_cost": 2.5e-6, + "cache_creation_input_audio_token_cost": 0.00002, + "cache_read_input_token_cost": 0.0000025, "input_cost_per_audio_token": 0.0001, - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", "output_cost_per_audio_token": 0.0002, - "output_cost_per_token": 2e-5, + "output_cost_per_token": 0.00002, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -2304,16 +2463,16 @@ "supports_tool_choice": true }, "azure/gpt-4o-realtime-preview-2024-12-17": { - "cache_read_input_token_cost": 2.5e-6, - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 5e-6, + "cache_read_input_token_cost": 0.0000025, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.000005, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 2e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00002, "supported_modalities": ["text", "audio"], "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, @@ -2324,37 +2483,37 @@ "supports_tool_choice": true }, "azure/gpt-4o-transcribe": { - "input_cost_per_audio_token": 6e-6, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.000006, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 16000, "max_output_tokens": 2000, "mode": "audio_transcription", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/audio/transcriptions"] }, "azure/gpt-4o-transcribe-diarize": { - "input_cost_per_audio_token": 6e-6, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.000006, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 16000, "max_output_tokens": 2000, "mode": "audio_transcription", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/audio/transcriptions"] }, "azure/gpt-5.1-2025-11-13": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_priority": 2e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "image"], @@ -2373,15 +2532,15 @@ "azure/gpt-5.1-chat-2025-11-13": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_priority": 2e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "image"], @@ -2399,15 +2558,15 @@ "azure/gpt-5.1-codex-2025-11-13": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1e-5, - "output_cost_per_token_priority": 2e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -2432,8 +2591,8 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 2e-6, - "output_cost_per_token_priority": 3.6e-6, + "output_cost_per_token": 0.000002, + "output_cost_per_token_priority": 0.0000036, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -2450,13 +2609,13 @@ }, "azure/gpt-5": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -2473,13 +2632,13 @@ }, "azure/gpt-5-2025-08-07": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -2496,13 +2655,13 @@ }, "azure/gpt-5-chat": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "source": "https://azure.microsoft.com/en-us/blog/gpt-5-in-azure-ai-foundry-the-future-of-ai-apps-and-agents-starts-here/", "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], @@ -2520,13 +2679,13 @@ }, "azure/gpt-5-chat-latest": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -2543,13 +2702,13 @@ }, "azure/gpt-5-codex": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -2572,7 +2731,7 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -2595,7 +2754,7 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -2657,7 +2816,7 @@ "supports_vision": true }, "azure/gpt-5-pro": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, @@ -2680,13 +2839,13 @@ }, "azure/gpt-5.1": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "image"], @@ -2703,13 +2862,13 @@ }, "azure/gpt-5.1-chat": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "image"], @@ -2726,13 +2885,36 @@ }, "azure/gpt-5.1-codex": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1-codex-max": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 0.00000125, + "litellm_provider": "azure", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -2755,7 +2937,7 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -2774,187 +2956,187 @@ "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/hd/1024-x-1024/dall-e-3": { "input_cost_per_pixel": 7.629e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/hd/1024-x-1792/dall-e-3": { "input_cost_per_pixel": 6.539e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/hd/1792-x-1024/dall-e-3": { "input_cost_per_pixel": 6.539e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/high/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.59263611e-7, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0490417e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/gpt-image-1-mini": { "input_cost_per_pixel": 8.0566406e-9, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 2.0751953125e-9, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 2.0751953125e-9, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 2.0345052083e-9, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 8.056640625e-9, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 8.056640625e-9, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 7.9752604167e-9, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 3.173828125e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 3.173828125e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 3.1575520833e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "azure/mistral-large-2402": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "azure", "max_input_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_function_calling": true }, "azure/mistral-large-latest": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "azure", "max_input_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_function_calling": true }, "azure/o1": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2963,14 +3145,14 @@ "supports_vision": true }, "azure/o1-2024-12-17": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2980,13 +3162,13 @@ }, "azure/o1-mini": { "cache_read_input_token_cost": 6.05e-7, - "input_cost_per_token": 1.21e-6, + "input_cost_per_token": 0.00000121, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 4.84e-6, + "output_cost_per_token": 0.00000484, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2995,13 +3177,13 @@ }, "azure/o1-mini-2024-09-12": { "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -3009,14 +3191,14 @@ "supports_vision": false }, "azure/o1-preview": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -3024,14 +3206,14 @@ "supports_vision": false }, "azure/o1-preview-2024-09-12": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -3041,13 +3223,13 @@ }, "azure/o3": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -3062,13 +3244,13 @@ "azure/o3-2025-04-16": { "deprecation_date": "2026-04-16", "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -3081,14 +3263,14 @@ "supports_vision": true }, "azure/o3-deep-research": { - "cache_read_input_token_cost": 2.5e-6, - "input_cost_per_token": 1e-5, + "cache_read_input_token_cost": 0.0000025, + "input_cost_per_token": 0.00001, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 4e-5, + "output_cost_per_token": 0.00004, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -3105,13 +3287,13 @@ }, "azure/o3-mini": { "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, @@ -3120,28 +3302,28 @@ }, "azure/o3-mini-2025-01-31": { "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, "supports_vision": false }, "azure/o3-pro": { - "input_cost_per_token": 2e-5, - "input_cost_per_token_batches": 1e-5, + "input_cost_per_token": 0.00002, + "input_cost_per_token_batches": 0.00001, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 8e-5, - "output_cost_per_token_batches": 4e-5, + "output_cost_per_token": 0.00008, + "output_cost_per_token_batches": 0.00004, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -3154,15 +3336,15 @@ "supports_vision": true }, "azure/o3-pro-2025-06-10": { - "input_cost_per_token": 2e-5, - "input_cost_per_token_batches": 1e-5, + "input_cost_per_token": 0.00002, + "input_cost_per_token_batches": 0.00001, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 8e-5, - "output_cost_per_token_batches": 4e-5, + "output_cost_per_token": 0.00008, + "output_cost_per_token_batches": 0.00004, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -3176,13 +3358,13 @@ }, "azure/o4-mini": { "cache_read_input_token_cost": 2.75e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -3196,13 +3378,13 @@ }, "azure/o4-mini-2025-04-16": { "cache_read_input_token_cost": 2.75e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -3212,28 +3394,28 @@ "supports_vision": true }, "azure/standard/1024-x-1024/dall-e-2": { - "input_cost_per_pixel": 0.0, + "input_cost_per_pixel": 0, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/standard/1024-x-1024/dall-e-3": { "input_cost_per_pixel": 3.81469e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/standard/1024-x-1792/dall-e-3": { "input_cost_per_pixel": 4.359e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/standard/1792-x-1024/dall-e-3": { "input_cost_per_pixel": 4.359e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/text-embedding-3-large": { "input_cost_per_token": 1.3e-7, @@ -3241,7 +3423,7 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/text-embedding-3-small": { "deprecation_date": "2026-04-30", @@ -3250,7 +3432,7 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/text-embedding-ada-002": { "input_cost_per_token": 1e-7, @@ -3258,42 +3440,42 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/speech/azure-tts": { - "input_cost_per_character": 15e-6, + "input_cost_per_character": 0.000015, "litellm_provider": "azure", "mode": "audio_speech", "source": "https://azure.microsoft.com/en-us/pricing/calculator/" }, "azure/speech/azure-tts-hd": { - "input_cost_per_character": 30e-6, + "input_cost_per_character": 0.00003, "litellm_provider": "azure", "mode": "audio_speech", "source": "https://azure.microsoft.com/en-us/pricing/calculator/" }, "azure/tts-1": { - "input_cost_per_character": 1.5e-5, + "input_cost_per_character": 0.000015, "litellm_provider": "azure", "mode": "audio_speech" }, "azure/tts-1-hd": { - "input_cost_per_character": 3e-5, + "input_cost_per_character": 0.00003, "litellm_provider": "azure", "mode": "audio_speech" }, "azure/us/gpt-4.1-2025-04-14": { "deprecation_date": "2026-11-04", "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 2.2e-6, - "input_cost_per_token_batches": 1.1e-6, + "input_cost_per_token": 0.0000022, + "input_cost_per_token_batches": 0.0000011, "litellm_provider": "azure", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8.8e-6, - "output_cost_per_token_batches": 4.4e-6, + "output_cost_per_token": 0.0000088, + "output_cost_per_token_batches": 0.0000044, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -3317,7 +3499,7 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.76e-6, + "output_cost_per_token": 0.00000176, "output_cost_per_token_batches": 8.8e-7, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], @@ -3358,14 +3540,14 @@ }, "azure/us/gpt-4o-2024-08-06": { "deprecation_date": "2026-02-27", - "cache_read_input_token_cost": 1.375e-6, - "input_cost_per_token": 2.75e-6, + "cache_read_input_token_cost": 0.000001375, + "input_cost_per_token": 0.00000275, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -3375,14 +3557,14 @@ }, "azure/us/gpt-4o-2024-11-20": { "deprecation_date": "2026-03-01", - "cache_creation_input_token_cost": 1.38e-6, - "input_cost_per_token": 2.75e-6, + "cache_creation_input_token_cost": 0.00000138, + "input_cost_per_token": 0.00000275, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_response_schema": true, @@ -3408,15 +3590,15 @@ "azure/us/gpt-4o-mini-realtime-preview-2024-12-17": { "cache_creation_input_audio_token_cost": 3.3e-7, "cache_read_input_token_cost": 3.3e-7, - "input_cost_per_audio_token": 1.1e-5, + "input_cost_per_audio_token": 0.000011, "input_cost_per_token": 6.6e-7, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 2.2e-5, - "output_cost_per_token": 2.64e-6, + "output_cost_per_audio_token": 0.000022, + "output_cost_per_token": 0.00000264, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -3425,17 +3607,17 @@ "supports_tool_choice": true }, "azure/us/gpt-4o-realtime-preview-2024-10-01": { - "cache_creation_input_audio_token_cost": 2.2e-5, - "cache_read_input_token_cost": 2.75e-6, + "cache_creation_input_audio_token_cost": 0.000022, + "cache_read_input_token_cost": 0.00000275, "input_cost_per_audio_token": 0.00011, - "input_cost_per_token": 5.5e-6, + "input_cost_per_token": 0.0000055, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", "output_cost_per_audio_token": 0.00022, - "output_cost_per_token": 2.2e-5, + "output_cost_per_token": 0.000022, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -3444,17 +3626,17 @@ "supports_tool_choice": true }, "azure/us/gpt-4o-realtime-preview-2024-12-17": { - "cache_read_input_audio_token_cost": 2.5e-6, - "cache_read_input_token_cost": 2.75e-6, - "input_cost_per_audio_token": 4.4e-5, - "input_cost_per_token": 5.5e-6, + "cache_read_input_audio_token_cost": 0.0000025, + "cache_read_input_token_cost": 0.00000275, + "input_cost_per_audio_token": 0.000044, + "input_cost_per_token": 0.0000055, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 2.2e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.000022, "supported_modalities": ["text", "audio"], "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, @@ -3466,13 +3648,13 @@ }, "azure/us/gpt-5-2025-08-07": { "cache_read_input_token_cost": 1.375e-7, - "input_cost_per_token": 1.375e-6, + "input_cost_per_token": 0.000001375, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -3495,7 +3677,7 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2.2e-6, + "output_cost_per_token": 0.0000022, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -3535,13 +3717,13 @@ }, "azure/us/gpt-5.1": { "cache_read_input_token_cost": 1.4e-7, - "input_cost_per_token": 1.38e-6, + "input_cost_per_token": 0.00000138, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "image"], @@ -3558,13 +3740,13 @@ }, "azure/us/gpt-5.1-chat": { "cache_read_input_token_cost": 1.4e-7, - "input_cost_per_token": 1.38e-6, + "input_cost_per_token": 0.00000138, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "image"], @@ -3581,13 +3763,13 @@ }, "azure/us/gpt-5.1-codex": { "cache_read_input_token_cost": 1.4e-7, - "input_cost_per_token": 1.38e-6, + "input_cost_per_token": 0.00000138, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -3610,7 +3792,7 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 2.2e-6, + "output_cost_per_token": 0.0000022, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -3626,14 +3808,14 @@ "supports_vision": true }, "azure/us/o1-2024-12-17": { - "cache_read_input_token_cost": 8.25e-6, - "input_cost_per_token": 1.65e-5, + "cache_read_input_token_cost": 0.00000825, + "input_cost_per_token": 0.0000165, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 6.6e-5, + "output_cost_per_token": 0.000066, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -3642,29 +3824,29 @@ }, "azure/us/o1-mini-2024-09-12": { "cache_read_input_token_cost": 6.05e-7, - "input_cost_per_token": 1.21e-6, + "input_cost_per_token": 0.00000121, "input_cost_per_token_batches": 6.05e-7, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 4.84e-6, - "output_cost_per_token_batches": 2.42e-6, + "output_cost_per_token": 0.00000484, + "output_cost_per_token_batches": 0.00000242, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, "supports_vision": false }, "azure/us/o1-preview-2024-09-12": { - "cache_read_input_token_cost": 8.25e-6, - "input_cost_per_token": 1.65e-5, + "cache_read_input_token_cost": 0.00000825, + "input_cost_per_token": 0.0000165, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6.6e-5, + "output_cost_per_token": 0.000066, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -3673,13 +3855,13 @@ "azure/us/o3-2025-04-16": { "deprecation_date": "2026-04-16", "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 2.2e-6, + "input_cost_per_token": 0.0000022, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 8.8e-6, + "output_cost_per_token": 0.0000088, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -3693,15 +3875,15 @@ }, "azure/us/o3-mini-2025-01-31": { "cache_read_input_token_cost": 6.05e-7, - "input_cost_per_token": 1.21e-6, + "input_cost_per_token": 0.00000121, "input_cost_per_token_batches": 6.05e-7, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.84e-6, - "output_cost_per_token_batches": 2.42e-6, + "output_cost_per_token": 0.00000484, + "output_cost_per_token_batches": 0.00000242, "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, @@ -3709,13 +3891,13 @@ }, "azure/us/o4-mini-2025-04-16": { "cache_read_input_token_cost": 3.1e-7, - "input_cost_per_token": 1.21e-6, + "input_cost_per_token": 0.00000121, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.84e-6, + "output_cost_per_token": 0.00000484, "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -3736,7 +3918,7 @@ "max_input_tokens": 512, "max_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024, "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice", "supports_embedding_image_input": true @@ -3747,7 +3929,7 @@ "max_input_tokens": 512, "max_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024, "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice", "supports_embedding_image_input": true @@ -3780,13 +3962,13 @@ "supports_vision": true }, "azure_ai/Llama-3.2-90B-Vision-Instruct": { - "input_cost_per_token": 2.04e-6, + "input_cost_per_token": 0.00000204, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 2.04e-6, + "output_cost_per_token": 0.00000204, "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-90b-vision-instruct-offer?tab=Overview", "supports_function_calling": true, "supports_tool_choice": true, @@ -3805,7 +3987,7 @@ "supports_tool_choice": true }, "azure_ai/Llama-4-Maverick-17B-128E-Instruct-FP8": { - "input_cost_per_token": 1.41e-6, + "input_cost_per_token": 0.00000141, "litellm_provider": "azure_ai", "max_input_tokens": 1000000, "max_output_tokens": 16384, @@ -3831,7 +4013,7 @@ "supports_vision": true }, "azure_ai/Meta-Llama-3-70B-Instruct": { - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "azure_ai", "max_input_tokens": 8192, "max_output_tokens": 2048, @@ -3841,24 +4023,24 @@ "supports_tool_choice": true }, "azure_ai/Meta-Llama-3.1-405B-Instruct": { - "input_cost_per_token": 5.33e-6, + "input_cost_per_token": 0.00000533, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 1.6e-5, + "output_cost_per_token": 0.000016, "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice", "supports_tool_choice": true }, "azure_ai/Meta-Llama-3.1-70B-Instruct": { - "input_cost_per_token": 2.68e-6, + "input_cost_per_token": 0.00000268, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 3.54e-6, + "output_cost_per_token": 0.00000354, "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-70b-instruct-offer?tab=PlansAndPrice", "supports_tool_choice": true }, @@ -4006,7 +4188,7 @@ "supports_function_calling": true }, "azure_ai/Phi-4-multimodal-instruct": { - "input_cost_per_audio_token": 4e-6, + "input_cost_per_audio_token": 0.000004, "input_cost_per_token": 8e-8, "litellm_provider": "azure_ai", "max_input_tokens": 131072, @@ -4045,108 +4227,108 @@ }, "azure_ai/mistral-document-ai-2505": { "litellm_provider": "azure_ai", - "ocr_cost_per_page": 3e-3, + "ocr_cost_per_page": 0.003, "mode": "ocr", "supported_endpoints": ["/v1/ocr"], "source": "https://devblogs.microsoft.com/foundry/whats-new-in-azure-ai-foundry-august-2025/#mistral-document-ai-(ocr)-%E2%80%94-serverless-in-foundry" }, "azure_ai/doc-intelligence/prebuilt-read": { "litellm_provider": "azure_ai", - "ocr_cost_per_page": 1.5e-3, + "ocr_cost_per_page": 0.0015, "mode": "ocr", "supported_endpoints": ["/v1/ocr"], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/doc-intelligence/prebuilt-layout": { "litellm_provider": "azure_ai", - "ocr_cost_per_page": 1e-2, + "ocr_cost_per_page": 0.01, "mode": "ocr", "supported_endpoints": ["/v1/ocr"], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/doc-intelligence/prebuilt-document": { "litellm_provider": "azure_ai", - "ocr_cost_per_page": 1e-2, + "ocr_cost_per_page": 0.01, "mode": "ocr", "supported_endpoints": ["/v1/ocr"], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/MAI-DS-R1": { - "input_cost_per_token": 1.35e-6, + "input_cost_per_token": 0.00000135, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5.4e-6, + "output_cost_per_token": 0.0000054, "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", "supports_reasoning": true, "supports_tool_choice": true }, "azure_ai/cohere-rerank-v3-english": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "azure_ai", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure_ai/cohere-rerank-v3-multilingual": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "azure_ai", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure_ai/cohere-rerank-v3.5": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "azure_ai", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure_ai/deepseek-r1": { - "input_cost_per_token": 1.35e-6, + "input_cost_per_token": 0.00000135, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5.4e-6, + "output_cost_per_token": 0.0000054, "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/deepseek-r1-improved-performance-higher-limits-and-transparent-pricing/4386367", "supports_reasoning": true, "supports_tool_choice": true }, "azure_ai/deepseek-v3": { - "input_cost_per_token": 1.14e-6, + "input_cost_per_token": 0.00000114, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4.56e-6, + "output_cost_per_token": 0.00000456, "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438", "supports_tool_choice": true }, "azure_ai/deepseek-v3-0324": { - "input_cost_per_token": 1.14e-6, + "input_cost_per_token": 0.00000114, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4.56e-6, + "output_cost_per_token": 0.00000456, "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438", "supports_function_calling": true, "supports_tool_choice": true @@ -4157,7 +4339,7 @@ "max_input_tokens": 128000, "max_tokens": 128000, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 3072, "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", "supported_endpoints": ["/v1/embeddings"], @@ -4165,13 +4347,13 @@ "supports_embedding_image_input": true }, "azure_ai/global/grok-3": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "source": "https://devblogs.microsoft.com/foundry/announcing-grok-3-and-grok-3-mini-on-azure-ai-foundry/", "supports_function_calling": true, "supports_response_schema": false, @@ -4185,7 +4367,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.27e-6, + "output_cost_per_token": 0.00000127, "source": "https://devblogs.microsoft.com/foundry/announcing-grok-3-and-grok-3-mini-on-azure-ai-foundry/", "supports_function_calling": true, "supports_reasoning": true, @@ -4194,13 +4376,13 @@ "supports_web_search": true }, "azure_ai/grok-3": { - "input_cost_per_token": 3.3e-6, + "input_cost_per_token": 0.0000033, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.65e-5, + "output_cost_per_token": 0.0000165, "source": "https://devblogs.microsoft.com/foundry/announcing-grok-3-and-grok-3-mini-on-azure-ai-foundry/", "supports_function_calling": true, "supports_response_schema": false, @@ -4214,7 +4396,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.38e-6, + "output_cost_per_token": 0.00000138, "source": "https://devblogs.microsoft.com/foundry/announcing-grok-3-and-grok-3-mini-on-azure-ai-foundry/", "supports_function_calling": true, "supports_reasoning": true, @@ -4223,13 +4405,13 @@ "supports_web_search": true }, "azure_ai/grok-4": { - "input_cost_per_token": 5.5e-6, + "input_cost_per_token": 0.0000055, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.75e-5, + "output_cost_per_token": 0.0000275, "source": "https://azure.microsoft.com/en-us/blog/grok-4-is-now-available-in-azure-ai-foundry-unlock-frontier-intelligence-and-business-ready-capabilities/", "supports_function_calling": true, "supports_response_schema": true, @@ -4237,8 +4419,8 @@ "supports_web_search": true }, "azure_ai/grok-4-fast-non-reasoning": { - "input_cost_per_token": 0.43e-6, - "output_cost_per_token": 1.73e-6, + "input_cost_per_token": 4.3e-7, + "output_cost_per_token": 0.00000173, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, @@ -4250,8 +4432,8 @@ "supports_web_search": true }, "azure_ai/grok-4-fast-reasoning": { - "input_cost_per_token": 0.43e-6, - "output_cost_per_token": 1.73e-6, + "input_cost_per_token": 4.3e-7, + "output_cost_per_token": 0.00000173, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, @@ -4264,13 +4446,13 @@ "supports_web_search": true }, "azure_ai/grok-code-fast-1": { - "input_cost_per_token": 3.5e-6, + "input_cost_per_token": 0.0000035, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.75e-5, + "output_cost_per_token": 0.0000175, "source": "https://azure.microsoft.com/en-us/blog/grok-4-is-now-available-in-azure-ai-foundry-unlock-frontier-intelligence-and-business-ready-capabilities/", "supports_function_calling": true, "supports_response_schema": true, @@ -4310,40 +4492,53 @@ "supports_tool_choice": true }, "azure_ai/mistral-large": { - "input_cost_per_token": 4e-6, + "input_cost_per_token": 0.000004, "litellm_provider": "azure_ai", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 1.2e-5, + "output_cost_per_token": 0.000012, "supports_function_calling": true, "supports_tool_choice": true }, "azure_ai/mistral-large-2407": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview", "supports_function_calling": true, "supports_tool_choice": true }, "azure_ai/mistral-large-latest": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview", "supports_function_calling": true, "supports_tool_choice": true }, + "azure_ai/mistral-large-3": { + "input_cost_per_token": 5e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 256000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.0000015, + "source": "https://azure.microsoft.com/en-us/blog/introducing-mistral-large-3-in-microsoft-foundry-open-capable-and-ready-for-production-workloads/", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, "azure_ai/mistral-medium-2505": { "input_cost_per_token": 4e-7, "litellm_provider": "azure_ai", @@ -4351,7 +4546,7 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true @@ -4368,24 +4563,24 @@ "supports_function_calling": true }, "azure_ai/mistral-small": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "azure_ai", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_function_calling": true, "supports_tool_choice": true }, "azure_ai/mistral-small-2503": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true @@ -4498,43 +4693,43 @@ "supports_tool_choice": true }, "bedrock/ap-northeast-1/anthropic.claude-instant-v1": { - "input_cost_per_token": 2.23e-6, + "input_cost_per_token": 0.00000223, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 7.55e-6, + "output_cost_per_token": 0.00000755, "supports_tool_choice": true }, "bedrock/ap-northeast-1/anthropic.claude-v1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "bedrock/ap-northeast-1/anthropic.claude-v2:1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 3.18e-6, + "input_cost_per_token": 0.00000318, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4.2e-6 + "output_cost_per_token": 0.0000042 }, "bedrock/ap-south-1/meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 3.6e-7, @@ -4546,13 +4741,13 @@ "output_cost_per_token": 7.2e-7 }, "bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 3.05e-6, + "input_cost_per_token": 0.00000305, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4.03e-6 + "output_cost_per_token": 0.00000403 }, "bedrock/ca-central-1/meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 3.5e-7, @@ -4622,42 +4817,42 @@ "supports_tool_choice": true }, "bedrock/eu-central-1/anthropic.claude-instant-v1": { - "input_cost_per_token": 2.48e-6, + "input_cost_per_token": 0.00000248, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 8.38e-6, + "output_cost_per_token": 0.00000838, "supports_tool_choice": true }, "bedrock/eu-central-1/anthropic.claude-v1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5 + "output_cost_per_token": 0.000024 }, "bedrock/eu-central-1/anthropic.claude-v2:1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 2.86e-6, + "input_cost_per_token": 0.00000286, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 3.78e-6 + "output_cost_per_token": 0.00000378 }, "bedrock/eu-west-1/meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 3.2e-7, @@ -4669,13 +4864,13 @@ "output_cost_per_token": 6.5e-7 }, "bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 3.45e-6, + "input_cost_per_token": 0.00000345, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4.55e-6 + "output_cost_per_token": 0.00000455 }, "bedrock/eu-west-2/meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 3.9e-7, @@ -4697,13 +4892,13 @@ "supports_tool_choice": true }, "bedrock/eu-west-3/mistral.mistral-large-2402-v1:0": { - "input_cost_per_token": 1.04e-5, + "input_cost_per_token": 0.0000104, "litellm_provider": "bedrock", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 3.12e-5, + "output_cost_per_token": 0.0000312, "supports_function_calling": true }, "bedrock/eu-west-3/mistral.mixtral-8x7b-instruct-v0:1": { @@ -4717,7 +4912,7 @@ "supports_tool_choice": true }, "bedrock/invoke/anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, @@ -4726,20 +4921,20 @@ "notes": "Anthropic via Invoke route does not currently support pdf input." }, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true }, "bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 4.45e-6, + "input_cost_per_token": 0.00000445, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5.88e-6 + "output_cost_per_token": 0.00000588 }, "bedrock/sa-east-1/meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 5e-7, @@ -4748,7 +4943,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.01e-6 + "output_cost_per_token": 0.00000101 }, "bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1": { "input_cost_per_second": 0.011, @@ -4815,37 +5010,37 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-6, + "output_cost_per_token": 0.0000024, "supports_tool_choice": true }, "bedrock/us-east-1/anthropic.claude-v1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "bedrock/us-east-1/anthropic.claude-v2:1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "bedrock/us-east-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 2.65e-6, + "input_cost_per_token": 0.00000265, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 3.5e-6 + "output_cost_per_token": 0.0000035 }, "bedrock/us-east-1/meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 3e-7, @@ -4867,13 +5062,13 @@ "supports_tool_choice": true }, "bedrock/us-east-1/mistral.mistral-large-2402-v1:0": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_function_calling": true }, "bedrock/us-east-1/mistral.mixtral-8x7b-instruct-v0:1": { @@ -4893,7 +5088,7 @@ "max_output_tokens": 10000, "max_tokens": 10000, "mode": "chat", - "output_cost_per_token": 3.84e-6, + "output_cost_per_token": 0.00000384, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, @@ -4906,7 +5101,7 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1536 }, "bedrock/us-gov-east-1/amazon.titan-embed-text-v2:0": { @@ -4915,17 +5110,17 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024 }, "bedrock/us-gov-east-1/amazon.titan-text-express-v1": { - "input_cost_per_token": 1.3e-6, + "input_cost_per_token": 0.0000013, "litellm_provider": "bedrock", "max_input_tokens": 42000, "max_output_tokens": 8000, "max_tokens": 8000, "mode": "chat", - "output_cost_per_token": 1.7e-6 + "output_cost_per_token": 0.0000017 }, "bedrock/us-gov-east-1/amazon.titan-text-lite-v1": { "input_cost_per_token": 3e-7, @@ -4943,16 +5138,16 @@ "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 1.5e-6 + "output_cost_per_token": 0.0000015 }, "bedrock/us-gov-east-1/anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 3.6e-6, + "input_cost_per_token": 0.0000036, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.8e-5, + "output_cost_per_token": 0.000018, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -4966,7 +5161,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -4974,13 +5169,13 @@ "supports_vision": true }, "bedrock/us-gov-east-1/claude-sonnet-4-5-20250929-v1:0": { - "input_cost_per_token": 3.3e-6, + "input_cost_per_token": 0.0000033, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.65e-5, + "output_cost_per_token": 0.0000165, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -4992,13 +5187,13 @@ "supports_vision": true }, "bedrock/us-gov-east-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 2.65e-6, + "input_cost_per_token": 0.00000265, "litellm_provider": "bedrock", "max_input_tokens": 8000, "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 3.5e-6, + "output_cost_per_token": 0.0000035, "supports_pdf_input": true }, "bedrock/us-gov-east-1/meta.llama3-8b-instruct-v1:0": { @@ -5008,7 +5203,7 @@ "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 2.65e-6, + "output_cost_per_token": 0.00000265, "supports_pdf_input": true }, "bedrock/us-gov-west-1/amazon.nova-pro-v1:0": { @@ -5018,7 +5213,7 @@ "max_output_tokens": 10000, "max_tokens": 10000, "mode": "chat", - "output_cost_per_token": 3.84e-6, + "output_cost_per_token": 0.00000384, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, @@ -5031,7 +5226,7 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1536 }, "bedrock/us-gov-west-1/amazon.titan-embed-text-v2:0": { @@ -5040,17 +5235,17 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024 }, "bedrock/us-gov-west-1/amazon.titan-text-express-v1": { - "input_cost_per_token": 1.3e-6, + "input_cost_per_token": 0.0000013, "litellm_provider": "bedrock", "max_input_tokens": 42000, "max_output_tokens": 8000, "max_tokens": 8000, "mode": "chat", - "output_cost_per_token": 1.7e-6 + "output_cost_per_token": 0.0000017 }, "bedrock/us-gov-west-1/amazon.titan-text-lite-v1": { "input_cost_per_token": 3e-7, @@ -5068,18 +5263,18 @@ "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 1.5e-6 + "output_cost_per_token": 0.0000015 }, "bedrock/us-gov-west-1/anthropic.claude-3-7-sonnet-20250219-v1:0": { - "cache_creation_input_token_cost": 4.5e-6, + "cache_creation_input_token_cost": 0.0000045, "cache_read_input_token_cost": 3.6e-7, - "input_cost_per_token": 3.6e-6, + "input_cost_per_token": 0.0000036, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.8e-5, + "output_cost_per_token": 0.000018, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -5091,13 +5286,13 @@ "supports_vision": true }, "bedrock/us-gov-west-1/anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 3.6e-6, + "input_cost_per_token": 0.0000036, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.8e-5, + "output_cost_per_token": 0.000018, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -5111,7 +5306,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -5119,13 +5314,13 @@ "supports_vision": true }, "bedrock/us-gov-west-1/claude-sonnet-4-5-20250929-v1:0": { - "input_cost_per_token": 3.3e-6, + "input_cost_per_token": 0.0000033, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.65e-5, + "output_cost_per_token": 0.0000165, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -5137,13 +5332,13 @@ "supports_vision": true }, "bedrock/us-gov-west-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 2.65e-6, + "input_cost_per_token": 0.00000265, "litellm_provider": "bedrock", "max_input_tokens": 8000, "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 3.5e-6, + "output_cost_per_token": 0.0000035, "supports_pdf_input": true }, "bedrock/us-gov-west-1/meta.llama3-8b-instruct-v1:0": { @@ -5153,17 +5348,17 @@ "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 2.65e-6, + "output_cost_per_token": 0.00000265, "supports_pdf_input": true }, "bedrock/us-west-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 2.65e-6, + "input_cost_per_token": 0.00000265, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 3.5e-6 + "output_cost_per_token": 0.0000035 }, "bedrock/us-west-1/meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 3e-7, @@ -5239,27 +5434,27 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-6, + "output_cost_per_token": 0.0000024, "supports_tool_choice": true }, "bedrock/us-west-2/anthropic.claude-v1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "bedrock/us-west-2/anthropic.claude-v2:1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "bedrock/us-west-2/mistral.mistral-7b-instruct-v0:2": { @@ -5273,13 +5468,13 @@ "supports_tool_choice": true }, "bedrock/us-west-2/mistral.mistral-large-2402-v1:0": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_function_calling": true }, "bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": { @@ -5293,7 +5488,7 @@ "supports_tool_choice": true }, "bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0": { - "cache_creation_input_token_cost": 1e-6, + "cache_creation_input_token_cost": 0.000001, "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 8e-7, "litellm_provider": "bedrock", @@ -5301,7 +5496,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -5316,7 +5511,7 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "supports_function_calling": true, "supports_tool_choice": true }, @@ -5445,13 +5640,13 @@ "output_cost_per_token": 5e-7 }, "chatgpt-4o-latest": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -5461,8 +5656,8 @@ "supports_vision": true }, "claude-3-5-haiku-20241022": { - "cache_creation_input_token_cost": 1e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.000001, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 8e-8, "deprecation_date": "2025-10-01", "input_cost_per_token": 8e-7, @@ -5471,7 +5666,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5488,17 +5683,17 @@ "tool_use_system_prompt_tokens": 264 }, "claude-3-5-haiku-latest": { - "cache_creation_input_token_cost": 1.25e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.00000125, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 1e-7, "deprecation_date": "2025-10-01", - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5515,16 +5710,16 @@ "tool_use_system_prompt_tokens": 264 }, "claude-haiku-4-5-20251001": { - "cache_creation_input_token_cost": 1.25e-6, - "cache_creation_input_token_cost_above_1hr": 2e-6, + "cache_creation_input_token_cost": 0.00000125, + "cache_creation_input_token_cost_above_1hr": 0.000002, "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_computer_use": true, @@ -5536,16 +5731,16 @@ "supports_vision": true }, "claude-haiku-4-5": { - "cache_creation_input_token_cost": 1.25e-6, - "cache_creation_input_token_cost_above_1hr": 2e-6, + "cache_creation_input_token_cost": 0.00000125, + "cache_creation_input_token_cost_above_1hr": 0.000002, "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_computer_use": true, @@ -5557,17 +5752,17 @@ "supports_vision": true }, "claude-3-5-sonnet-20240620": { - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-7, "deprecation_date": "2025-06-01", - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -5578,17 +5773,17 @@ "tool_use_system_prompt_tokens": 159 }, "claude-3-5-sonnet-20241022": { - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-7, "deprecation_date": "2025-10-01", - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5606,17 +5801,17 @@ "tool_use_system_prompt_tokens": 159 }, "claude-3-5-sonnet-latest": { - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-7, "deprecation_date": "2025-06-01", - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5634,17 +5829,17 @@ "tool_use_system_prompt_tokens": 159 }, "claude-3-7-sonnet-20250219": { - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-7, "deprecation_date": "2026-02-19", - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5663,17 +5858,17 @@ "tool_use_system_prompt_tokens": 159 }, "claude-3-7-sonnet-latest": { - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-7, "deprecation_date": "2025-06-01", - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5692,7 +5887,7 @@ }, "claude-3-haiku-20240307": { "cache_creation_input_token_cost": 3e-7, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-8, "input_cost_per_token": 2.5e-7, "litellm_provider": "anthropic", @@ -5700,7 +5895,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -5710,17 +5905,17 @@ "tool_use_system_prompt_tokens": 264 }, "claude-3-opus-20240229": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_creation_input_token_cost_above_1hr": 6e-6, - "cache_read_input_token_cost": 1.5e-6, + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.000006, + "cache_read_input_token_cost": 0.0000015, "deprecation_date": "2026-05-01", - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -5730,17 +5925,17 @@ "tool_use_system_prompt_tokens": 395 }, "claude-3-opus-latest": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_creation_input_token_cost_above_1hr": 6e-6, - "cache_read_input_token_cost": 1.5e-6, + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.000006, + "cache_read_input_token_cost": 0.0000015, "deprecation_date": "2025-03-01", - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -5750,15 +5945,15 @@ "tool_use_system_prompt_tokens": 395 }, "claude-4-opus-20250514": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5776,19 +5971,19 @@ "tool_use_system_prompt_tokens": 159 }, "claude-4-sonnet-20250514": { - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost": 3e-7, "cache_read_input_token_cost_above_200k_tokens": 6e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, "litellm_provider": "anthropic", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 1000000, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_above_200k_tokens": 2.25e-5, + "output_cost_per_token": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.0000225, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5806,19 +6001,19 @@ "tool_use_system_prompt_tokens": 159 }, "claude-sonnet-4-5": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5836,19 +6031,19 @@ "tool_use_system_prompt_tokens": 346 }, "claude-sonnet-4-5-20250929": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5867,19 +6062,19 @@ "tool_use_system_prompt_tokens": 346 }, "claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -5892,16 +6087,16 @@ "tool_use_system_prompt_tokens": 159 }, "claude-opus-4-1": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_creation_input_token_cost_above_1hr": 3e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.00003, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5919,17 +6114,17 @@ "tool_use_system_prompt_tokens": 159 }, "claude-opus-4-1-20250805": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_creation_input_token_cost_above_1hr": 3e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.00003, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "deprecation_date": "2026-08-05", "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5947,17 +6142,71 @@ "tool_use_system_prompt_tokens": 159 }, "claude-opus-4-20250514": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_creation_input_token_cost_above_1hr": 3e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.00003, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "deprecation_date": "2026-05-14", "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-5-20251101": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-5": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5976,20 +6225,20 @@ }, "claude-sonnet-4-20250514": { "deprecation_date": "2026-05-14", - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "anthropic", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -6007,40 +6256,40 @@ "tool_use_system_prompt_tokens": 159 }, "cloudflare/@cf/meta/llama-2-7b-chat-fp16": { - "input_cost_per_token": 1.923e-6, + "input_cost_per_token": 0.000001923, "litellm_provider": "cloudflare", "max_input_tokens": 3072, "max_output_tokens": 3072, "max_tokens": 3072, "mode": "chat", - "output_cost_per_token": 1.923e-6 + "output_cost_per_token": 0.000001923 }, "cloudflare/@cf/meta/llama-2-7b-chat-int8": { - "input_cost_per_token": 1.923e-6, + "input_cost_per_token": 0.000001923, "litellm_provider": "cloudflare", "max_input_tokens": 2048, "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 1.923e-6 + "output_cost_per_token": 0.000001923 }, "cloudflare/@cf/mistral/mistral-7b-instruct-v0.1": { - "input_cost_per_token": 1.923e-6, + "input_cost_per_token": 0.000001923, "litellm_provider": "cloudflare", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.923e-6 + "output_cost_per_token": 0.000001923 }, "cloudflare/@hf/thebloke/codellama-7b-instruct-awq": { - "input_cost_per_token": 1.923e-6, + "input_cost_per_token": 0.000001923, "litellm_provider": "cloudflare", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.923e-6 + "output_cost_per_token": 0.000001923 }, "code-bison": { "input_cost_per_character": 2.5e-7, @@ -6222,38 +6471,38 @@ "supports_tool_choice": true }, "codestral/codestral-2405": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "codestral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://docs.mistral.ai/capabilities/code_generation/", "supports_assistant_prefill": true, "supports_tool_choice": true }, "codestral/codestral-latest": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "codestral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://docs.mistral.ai/capabilities/code_generation/", "supports_assistant_prefill": true, "supports_tool_choice": true }, "codex-mini-latest": { "cache_read_input_token_cost": 3.75e-7, - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -6278,13 +6527,13 @@ "supports_tool_choice": true }, "cohere.command-r-plus-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_tool_choice": true }, "cohere.command-r-v1:0": { @@ -6294,17 +6543,17 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_tool_choice": true }, "cohere.command-text-v14": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "bedrock", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_tool_choice": true }, "cohere.embed-english-v3": { @@ -6313,7 +6562,7 @@ "max_input_tokens": 512, "max_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_embedding_image_input": true }, "cohere.embed-multilingual-v3": { @@ -6322,7 +6571,7 @@ "max_input_tokens": 512, "max_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_embedding_image_input": true }, "cohere.embed-v4:0": { @@ -6331,7 +6580,7 @@ "max_input_tokens": 128000, "max_tokens": 128000, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1536, "supports_embedding_image_input": true }, @@ -6341,13 +6590,13 @@ "max_input_tokens": 128000, "max_tokens": 128000, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1536, "supports_embedding_image_input": true }, "cohere.rerank-v3-5:0": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "bedrock", "max_document_chunks_per_query": 100, "max_input_tokens": 32000, @@ -6356,25 +6605,25 @@ "max_tokens": 32000, "max_tokens_per_document_chunk": 512, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "command": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "cohere", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "command-a-03-2025": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "cohere_chat", "max_input_tokens": 256000, "max_output_tokens": 8000, "max_tokens": 8000, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true }, @@ -6389,13 +6638,13 @@ "supports_tool_choice": true }, "command-nightly": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "cohere", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "command-r": { "input_cost_per_token": 1.5e-7, @@ -6420,24 +6669,24 @@ "supports_tool_choice": true }, "command-r-plus": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "cohere_chat", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true }, "command-r-plus-08-2024": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "cohere_chat", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true }, @@ -6454,13 +6703,13 @@ "supports_tool_choice": true }, "computer-use-preview": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "azure", "max_input_tokens": 8192, "max_output_tokens": 1024, "max_tokens": 1024, "mode": "chat", - "output_cost_per_token": 1.2e-5, + "output_cost_per_token": 0.000012, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -6481,7 +6730,7 @@ "max_output_tokens": 8192, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.7e-6, + "output_cost_per_token": 0.0000017, "source": "https://api-docs.deepseek.com/quick_start/pricing", "supported_endpoints": ["/v1/chat/completions"], "supports_function_calling": true, @@ -6500,7 +6749,7 @@ "max_output_tokens": 65536, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.7e-6, + "output_cost_per_token": 0.0000017, "source": "https://api-docs.deepseek.com/quick_start/pricing", "supported_endpoints": ["/v1/chat/completions"], "supports_function_calling": false, @@ -6519,7 +6768,7 @@ "max_output_tokens": 16384, "max_tokens": 1000000, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, @@ -6539,12 +6788,12 @@ { "input_cost_per_token": 5e-8, "output_cost_per_token": 4e-7, - "range": [0, 256000.0] + "range": [0, 256000] }, { "input_cost_per_token": 2.5e-7, - "output_cost_per_token": 2e-6, - "range": [256000.0, 1000000.0] + "output_cost_per_token": 0.000002, + "range": [256000, 1000000] } ] }, @@ -6562,23 +6811,23 @@ { "input_cost_per_token": 5e-8, "output_cost_per_token": 4e-7, - "range": [0, 256000.0] + "range": [0, 256000] }, { "input_cost_per_token": 2.5e-7, - "output_cost_per_token": 2e-6, - "range": [256000.0, 1000000.0] + "output_cost_per_token": 0.000002, + "range": [256000, 1000000] } ] }, "dashscope/qwen-max": { - "input_cost_per_token": 1.6e-6, + "input_cost_per_token": 0.0000016, "litellm_provider": "dashscope", "max_input_tokens": 30720, "max_output_tokens": 8192, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6.4e-6, + "output_cost_per_token": 0.0000064, "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, @@ -6591,7 +6840,7 @@ "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, @@ -6604,7 +6853,7 @@ "max_output_tokens": 8192, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, @@ -6617,8 +6866,8 @@ "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_reasoning_token": 4e-6, - "output_cost_per_token": 1.2e-6, + "output_cost_per_reasoning_token": 0.000004, + "output_cost_per_token": 0.0000012, "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, @@ -6631,8 +6880,8 @@ "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_reasoning_token": 4e-6, - "output_cost_per_token": 1.2e-6, + "output_cost_per_reasoning_token": 0.000004, + "output_cost_per_token": 0.0000012, "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, @@ -6651,15 +6900,15 @@ "tiered_pricing": [ { "input_cost_per_token": 4e-7, - "output_cost_per_reasoning_token": 4e-6, - "output_cost_per_token": 1.2e-6, - "range": [0, 256000.0] + "output_cost_per_reasoning_token": 0.000004, + "output_cost_per_token": 0.0000012, + "range": [0, 256000] }, { - "input_cost_per_token": 1.2e-6, - "output_cost_per_reasoning_token": 1.2e-5, - "output_cost_per_token": 3.6e-6, - "range": [256000.0, 1000000.0] + "input_cost_per_token": 0.0000012, + "output_cost_per_reasoning_token": 0.000012, + "output_cost_per_token": 0.0000036, + "range": [256000, 1000000] } ] }, @@ -6676,15 +6925,15 @@ "tiered_pricing": [ { "input_cost_per_token": 4e-7, - "output_cost_per_reasoning_token": 4e-6, - "output_cost_per_token": 1.2e-6, - "range": [0, 256000.0] + "output_cost_per_reasoning_token": 0.000004, + "output_cost_per_token": 0.0000012, + "range": [0, 256000] }, { - "input_cost_per_token": 1.2e-6, - "output_cost_per_reasoning_token": 1.2e-5, - "output_cost_per_token": 3.6e-6, - "range": [256000.0, 1000000.0] + "input_cost_per_token": 0.0000012, + "output_cost_per_reasoning_token": 0.000012, + "output_cost_per_token": 0.0000036, + "range": [256000, 1000000] } ] }, @@ -6701,15 +6950,15 @@ "tiered_pricing": [ { "input_cost_per_token": 4e-7, - "output_cost_per_reasoning_token": 4e-6, - "output_cost_per_token": 1.2e-6, - "range": [0, 256000.0] + "output_cost_per_reasoning_token": 0.000004, + "output_cost_per_token": 0.0000012, + "range": [0, 256000] }, { - "input_cost_per_token": 1.2e-6, - "output_cost_per_reasoning_token": 1.2e-5, - "output_cost_per_token": 3.6e-6, - "range": [256000.0, 1000000.0] + "input_cost_per_token": 0.0000012, + "output_cost_per_reasoning_token": 0.000012, + "output_cost_per_token": 0.0000036, + "range": [256000, 1000000] } ] }, @@ -6793,26 +7042,26 @@ { "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 3e-7, - "output_cost_per_token": 1.5e-6, - "range": [0, 32000.0] + "output_cost_per_token": 0.0000015, + "range": [0, 32000] }, { "cache_read_input_token_cost": 1.2e-7, "input_cost_per_token": 5e-7, - "output_cost_per_token": 2.5e-6, - "range": [32000.0, 128000.0] + "output_cost_per_token": 0.0000025, + "range": [32000, 128000] }, { "cache_read_input_token_cost": 2e-7, "input_cost_per_token": 8e-7, - "output_cost_per_token": 4e-6, - "range": [128000.0, 256000.0] + "output_cost_per_token": 0.000004, + "range": [128000, 256000] }, { "cache_read_input_token_cost": 4e-7, - "input_cost_per_token": 1.6e-6, - "output_cost_per_token": 9.6e-6, - "range": [256000.0, 1000000.0] + "input_cost_per_token": 0.0000016, + "output_cost_per_token": 0.0000096, + "range": [256000, 1000000] } ] }, @@ -6829,23 +7078,23 @@ "tiered_pricing": [ { "input_cost_per_token": 3e-7, - "output_cost_per_token": 1.5e-6, - "range": [0, 32000.0] + "output_cost_per_token": 0.0000015, + "range": [0, 32000] }, { "input_cost_per_token": 5e-7, - "output_cost_per_token": 2.5e-6, - "range": [32000.0, 128000.0] + "output_cost_per_token": 0.0000025, + "range": [32000, 128000] }, { "input_cost_per_token": 8e-7, - "output_cost_per_token": 4e-6, - "range": [128000.0, 256000.0] + "output_cost_per_token": 0.000004, + "range": [128000, 256000] }, { - "input_cost_per_token": 1.6e-6, - "output_cost_per_token": 9.6e-6, - "range": [256000.0, 1000000.0] + "input_cost_per_token": 0.0000016, + "output_cost_per_token": 0.0000096, + "range": [256000, 1000000] } ] }, @@ -6862,27 +7111,27 @@ "tiered_pricing": [ { "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 1e-6, - "output_cost_per_token": 5e-6, - "range": [0, 32000.0] + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "range": [0, 32000] }, { "cache_read_input_token_cost": 1.8e-7, - "input_cost_per_token": 1.8e-6, - "output_cost_per_token": 9e-6, - "range": [32000.0, 128000.0] + "input_cost_per_token": 0.0000018, + "output_cost_per_token": 0.000009, + "range": [32000, 128000] }, { "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "output_cost_per_token": 1.5e-5, - "range": [128000.0, 256000.0] + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "range": [128000, 256000] }, { "cache_read_input_token_cost": 6e-7, - "input_cost_per_token": 6e-6, - "output_cost_per_token": 6e-5, - "range": [256000.0, 1000000.0] + "input_cost_per_token": 0.000006, + "output_cost_per_token": 0.00006, + "range": [256000, 1000000] } ] }, @@ -6898,24 +7147,24 @@ "supports_tool_choice": true, "tiered_pricing": [ { - "input_cost_per_token": 1e-6, - "output_cost_per_token": 5e-6, - "range": [0, 32000.0] + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "range": [0, 32000] }, { - "input_cost_per_token": 1.8e-6, - "output_cost_per_token": 9e-6, - "range": [32000.0, 128000.0] + "input_cost_per_token": 0.0000018, + "output_cost_per_token": 0.000009, + "range": [32000, 128000] }, { - "input_cost_per_token": 3e-6, - "output_cost_per_token": 1.5e-5, - "range": [128000.0, 256000.0] + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "range": [128000, 256000] }, { - "input_cost_per_token": 6e-6, - "output_cost_per_token": 6e-5, - "range": [256000.0, 1000000.0] + "input_cost_per_token": 0.000006, + "output_cost_per_token": 0.00006, + "range": [256000, 1000000] } ] }, @@ -6931,19 +7180,19 @@ "supports_tool_choice": true, "tiered_pricing": [ { - "input_cost_per_token": 1.2e-6, - "output_cost_per_token": 6e-6, - "range": [0, 32000.0] + "input_cost_per_token": 0.0000012, + "output_cost_per_token": 0.000006, + "range": [0, 32000] }, { - "input_cost_per_token": 2.4e-6, - "output_cost_per_token": 1.2e-5, - "range": [32000.0, 128000.0] + "input_cost_per_token": 0.0000024, + "output_cost_per_token": 0.000012, + "range": [32000, 128000] }, { - "input_cost_per_token": 3e-6, - "output_cost_per_token": 1.5e-5, - "range": [128000.0, 252000.0] + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "range": [128000, 252000] } ] }, @@ -6954,7 +7203,7 @@ "max_output_tokens": 8192, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.4e-6, + "output_cost_per_token": 0.0000024, "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, @@ -6962,7 +7211,7 @@ }, "databricks/databricks-bge-large-en": { "input_cost_per_token": 1.0003e-7, - "input_dbu_cost_per_token": 1.429e-6, + "input_dbu_cost_per_token": 0.000001429, "litellm_provider": "databricks", "max_input_tokens": 512, "max_tokens": 512, @@ -6970,49 +7219,321 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "embedding", - "output_cost_per_token": 0.0, - "output_dbu_cost_per_token": 0.0, + "output_cost_per_token": 0, + "output_dbu_cost_per_token": 0, "output_vector_size": 1024, "source": "https://www.databricks.com/product/pricing/foundation-model-serving" }, "databricks/databricks-claude-3-7-sonnet": { - "input_cost_per_token": 2.5e-6, - "input_dbu_cost_per_token": 3.571e-5, + "input_cost_per_token": 0.0000029999900000000002, + "input_dbu_cost_per_token": 0.000042857, "litellm_provider": "databricks", "max_input_tokens": 200000, "max_output_tokens": 128000, "max_tokens": 200000, "metadata": { - "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 1.7857e-5, - "output_db_cost_per_token": 0.000214286, - "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "output_cost_per_token": 0.000015000020000000002, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", "supports_assistant_prefill": true, "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true }, - "databricks/databricks-gte-large-en": { - "input_cost_per_token": 1.2999e-7, - "input_dbu_cost_per_token": 1.857e-6, + "databricks/databricks-claude-haiku-4-5": { + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, "litellm_provider": "databricks", - "max_input_tokens": 8192, - "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, "metadata": { - "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, - "mode": "embedding", - "output_cost_per_token": 0.0, - "output_dbu_cost_per_token": 0.0, - "output_vector_size": 1024, - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" - }, - "databricks/databricks-llama-2-70b-chat": { - "input_cost_per_token": 5.0001e-7, - "input_dbu_cost_per_token": 7.143e-6, - "litellm_provider": "databricks", + "mode": "chat", + "output_cost_per_token": 0.00000500003, + "output_dbu_cost_per_token": 0.000071429, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4": { + "input_cost_per_token": 0.000015000020000000002, + "input_dbu_cost_per_token": 0.000214286, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.00007500003000000001, + "output_dbu_cost_per_token": 0.001071429, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4-1": { + "input_cost_per_token": 0.000015000020000000002, + "input_dbu_cost_per_token": 0.000214286, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.00007500003000000001, + "output_dbu_cost_per_token": 0.001071429, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4-5": { + "input_cost_per_token": 0.00000500003, + "input_dbu_cost_per_token": 0.000071429, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.000025000010000000002, + "output_dbu_cost_per_token": 0.000357143, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4": { + "input_cost_per_token": 0.0000029999900000000002, + "input_dbu_cost_per_token": 0.000042857, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.000015000020000000002, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4-1": { + "input_cost_per_token": 0.0000029999900000000002, + "input_dbu_cost_per_token": 0.000042857, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.000015000020000000002, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4-5": { + "input_cost_per_token": 0.0000029999900000000002, + "input_dbu_cost_per_token": 0.000042857, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.000015000020000000002, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-gemini-2-5-flash": { + "input_cost_per_token": 3.0001999999999996e-7, + "input_dbu_cost_per_token": 0.000004285999999999999, + "litellm_provider": "databricks", + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 1048576, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.00000249998, + "output_dbu_cost_per_token": 0.000035714, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "databricks/databricks-gemini-2-5-pro": { + "input_cost_per_token": 0.00000124999, + "input_dbu_cost_per_token": 0.000017857, + "litellm_provider": "databricks", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 1048576, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.000009999990000000002, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "databricks/databricks-gemma-3-12b": { + "input_cost_per_token": 1.5000999999999998e-7, + "input_dbu_cost_per_token": 0.0000021429999999999996, + "litellm_provider": "databricks", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 5.0001e-7, + "output_dbu_cost_per_token": 0.000007143, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-gpt-5": { + "input_cost_per_token": 0.00000124999, + "input_dbu_cost_per_token": 0.000017857, + "litellm_provider": "databricks", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 400000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.000009999990000000002, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-1": { + "input_cost_per_token": 0.00000124999, + "input_dbu_cost_per_token": 0.000017857, + "litellm_provider": "databricks", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 400000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.000009999990000000002, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-mini": { + "input_cost_per_token": 2.4997000000000006e-7, + "input_dbu_cost_per_token": 0.000003571, + "litellm_provider": "databricks", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 400000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.0000019999700000000004, + "output_dbu_cost_per_token": 0.000028571, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-nano": { + "input_cost_per_token": 4.998e-8, + "input_dbu_cost_per_token": 7.14e-7, + "litellm_provider": "databricks", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 400000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 3.9998000000000007e-7, + "output_dbu_cost_per_token": 0.000005714000000000001, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-oss-120b": { + "input_cost_per_token": 1.5000999999999998e-7, + "input_dbu_cost_per_token": 0.0000021429999999999996, + "litellm_provider": "databricks", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 5.9997e-7, + "output_dbu_cost_per_token": 0.000008571, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-gpt-oss-20b": { + "input_cost_per_token": 7e-8, + "input_dbu_cost_per_token": 0.000001, + "litellm_provider": "databricks", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 3.0001999999999996e-7, + "output_dbu_cost_per_token": 0.000004285999999999999, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-gte-large-en": { + "input_cost_per_token": 1.2999000000000001e-7, + "input_dbu_cost_per_token": 0.000001857, + "litellm_provider": "databricks", + "max_input_tokens": 8192, + "max_tokens": 8192, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "embedding", + "output_cost_per_token": 0, + "output_dbu_cost_per_token": 0, + "output_vector_size": 1024, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-llama-2-70b-chat": { + "input_cost_per_token": 5.0001e-7, + "input_dbu_cost_per_token": 0.000007143, + "litellm_provider": "databricks", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, @@ -7020,14 +7541,14 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 1.5e-6, - "output_dbu_cost_per_token": 2.1429e-5, + "output_cost_per_token": 0.0000015000300000000002, + "output_dbu_cost_per_token": 0.000021429, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-llama-4-maverick": { - "input_cost_per_token": 5e-6, - "input_dbu_cost_per_token": 7.143e-5, + "input_cost_per_token": 5.0001e-7, + "input_dbu_cost_per_token": 0.000007143, "litellm_provider": "databricks", "max_input_tokens": 128000, "max_output_tokens": 128000, @@ -7036,14 +7557,14 @@ "notes": "Databricks documentation now provides both DBU costs (_dbu_cost_per_token) and dollar costs(_cost_per_token)." }, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_dbu_cost_per_token": 0.00021429, + "output_cost_per_token": 0.0000015000300000000002, + "output_dbu_cost_per_token": 0.000021429, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-meta-llama-3-1-405b-instruct": { - "input_cost_per_token": 5e-6, - "input_dbu_cost_per_token": 7.1429e-5, + "input_cost_per_token": 0.00000500003, + "input_dbu_cost_per_token": 0.000071429, "litellm_provider": "databricks", "max_input_tokens": 128000, "max_output_tokens": 128000, @@ -7052,14 +7573,29 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 1.500002e-5, - "output_db_cost_per_token": 0.000214286, + "output_cost_per_token": 0.000015000020000000002, + "output_dbu_cost_per_token": 0.000214286, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, + "databricks/databricks-meta-llama-3-1-8b-instruct": { + "input_cost_per_token": 1.5000999999999998e-7, + "input_dbu_cost_per_token": 0.0000021429999999999996, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 4.5003000000000007e-7, + "output_dbu_cost_per_token": 0.000006429000000000001, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, "databricks/databricks-meta-llama-3-3-70b-instruct": { - "input_cost_per_token": 1.00002e-6, - "input_dbu_cost_per_token": 1.4286e-5, + "input_cost_per_token": 5.0001e-7, + "input_dbu_cost_per_token": 0.000007143, "litellm_provider": "databricks", "max_input_tokens": 128000, "max_output_tokens": 128000, @@ -7068,14 +7604,14 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 2.99999e-6, - "output_dbu_cost_per_token": 4.2857e-5, + "output_cost_per_token": 0.0000015000300000000002, + "output_dbu_cost_per_token": 0.000021429, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-meta-llama-3-70b-instruct": { - "input_cost_per_token": 1.00002e-6, - "input_dbu_cost_per_token": 1.4286e-5, + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, "litellm_provider": "databricks", "max_input_tokens": 128000, "max_output_tokens": 128000, @@ -7084,14 +7620,14 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 2.99999e-6, - "output_dbu_cost_per_token": 4.2857e-5, + "output_cost_per_token": 0.0000029999900000000002, + "output_dbu_cost_per_token": 0.000042857, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-mixtral-8x7b-instruct": { "input_cost_per_token": 5.0001e-7, - "input_dbu_cost_per_token": 7.143e-6, + "input_dbu_cost_per_token": 0.000007143, "litellm_provider": "databricks", "max_input_tokens": 4096, "max_output_tokens": 4096, @@ -7100,14 +7636,14 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 9.9902e-7, - "output_dbu_cost_per_token": 1.4286e-5, + "output_cost_per_token": 0.00000100002, + "output_dbu_cost_per_token": 0.000014286, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-mpt-30b-instruct": { - "input_cost_per_token": 9.9902e-7, - "input_dbu_cost_per_token": 1.4286e-5, + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, "litellm_provider": "databricks", "max_input_tokens": 8192, "max_output_tokens": 8192, @@ -7116,14 +7652,14 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 9.9902e-7, - "output_dbu_cost_per_token": 1.4286e-5, + "output_cost_per_token": 0.00000100002, + "output_dbu_cost_per_token": 0.000014286, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-mpt-7b-instruct": { "input_cost_per_token": 5.0001e-7, - "input_dbu_cost_per_token": 7.143e-6, + "input_dbu_cost_per_token": 0.000007143, "litellm_provider": "databricks", "max_input_tokens": 8192, "max_output_tokens": 8192, @@ -7132,8 +7668,8 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 0.0, - "output_dbu_cost_per_token": 0.0, + "output_cost_per_token": 0, + "output_dbu_cost_per_token": 0, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, @@ -7143,13 +7679,13 @@ "mode": "search" }, "davinci-002": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "text-completion-openai", "max_input_tokens": 16384, "max_output_tokens": 4096, "max_tokens": 16384, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "deepgram/base": { "input_cost_per_second": 0.00020833, @@ -7159,7 +7695,7 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7171,7 +7707,7 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7183,7 +7719,7 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7195,7 +7731,7 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7207,7 +7743,7 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7219,7 +7755,7 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7231,7 +7767,7 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7243,7 +7779,7 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7255,7 +7791,7 @@ "original_pricing_per_minute": 0.0145 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7267,7 +7803,7 @@ "original_pricing_per_minute": 0.0145 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7279,7 +7815,7 @@ "original_pricing_per_minute": 0.0145 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7291,7 +7827,7 @@ "original_pricing_per_minute": 0.0145 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7303,211 +7839,211 @@ "original_pricing_per_minute": 0.0145 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-atc": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-automotive": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-conversationalai": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-drivethru": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-finance": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-general": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-meeting": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-phonecall": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-video": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-voicemail": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-3": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-3-general": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-3-medical": { - "input_cost_per_second": 8.667e-5, + "input_cost_per_second": 0.00008667, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0052/60 seconds = $0.00008667 per second (multilingual)", "original_pricing_per_minute": 0.0052 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-general": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-phonecall": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7518,7 +8054,7 @@ "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7529,7 +8065,7 @@ "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7540,7 +8076,7 @@ "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7551,7 +8087,7 @@ "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7562,7 +8098,7 @@ "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7573,7 +8109,7 @@ "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -7591,8 +8127,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1e-6, - "output_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7683,7 +8219,7 @@ "max_input_tokens": 262144, "max_output_tokens": 262144, "input_cost_per_token": 3e-7, - "output_cost_per_token": 2.9e-6, + "output_cost_per_token": 0.0000029, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7713,7 +8249,7 @@ "max_input_tokens": 262144, "max_output_tokens": 262144, "input_cost_per_token": 4e-7, - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7723,7 +8259,7 @@ "max_input_tokens": 262144, "max_output_tokens": 262144, "input_cost_per_token": 2.9e-7, - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7733,7 +8269,7 @@ "max_input_tokens": 262144, "max_output_tokens": 262144, "input_cost_per_token": 1.4e-7, - "output_cost_per_token": 1.4e-6, + "output_cost_per_token": 0.0000014, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7743,7 +8279,7 @@ "max_input_tokens": 262144, "max_output_tokens": 262144, "input_cost_per_token": 1.4e-7, - "output_cost_per_token": 1.4e-6, + "output_cost_per_token": 0.0000014, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7783,7 +8319,7 @@ "max_input_tokens": 16384, "max_output_tokens": 16384, "input_cost_per_token": 2.7e-7, - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": false @@ -7792,8 +8328,8 @@ "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "input_cost_per_token": 3.3e-6, - "output_cost_per_token": 1.65e-5, + "input_cost_per_token": 0.0000033, + "output_cost_per_token": 0.0000165, "cache_read_input_token_cost": 3.3e-7, "litellm_provider": "deepinfra", "mode": "chat", @@ -7803,8 +8339,8 @@ "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "input_cost_per_token": 1.65e-5, - "output_cost_per_token": 8.25e-5, + "input_cost_per_token": 0.0000165, + "output_cost_per_token": 0.0000825, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7813,8 +8349,8 @@ "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "input_cost_per_token": 3.3e-6, - "output_cost_per_token": 1.65e-5, + "input_cost_per_token": 0.0000033, + "output_cost_per_token": 0.0000165, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7824,7 +8360,7 @@ "max_input_tokens": 163840, "max_output_tokens": 163840, "input_cost_per_token": 7e-7, - "output_cost_per_token": 2.4e-6, + "output_cost_per_token": 0.0000024, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7834,7 +8370,7 @@ "max_input_tokens": 163840, "max_output_tokens": 163840, "input_cost_per_token": 5e-7, - "output_cost_per_token": 2.15e-6, + "output_cost_per_token": 0.00000215, "cache_read_input_token_cost": 4e-7, "litellm_provider": "deepinfra", "mode": "chat", @@ -7844,8 +8380,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 1e-6, - "output_cost_per_token": 3e-6, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7874,8 +8410,8 @@ "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "input_cost_per_token": 1e-6, - "output_cost_per_token": 3e-6, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7905,7 +8441,7 @@ "max_input_tokens": 163840, "max_output_tokens": 163840, "input_cost_per_token": 2.7e-7, - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "cache_read_input_token_cost": 2.16e-7, "litellm_provider": "deepinfra", "mode": "chat", @@ -7917,7 +8453,7 @@ "max_input_tokens": 163840, "max_output_tokens": 163840, "input_cost_per_token": 2.7e-7, - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "cache_read_input_token_cost": 2.16e-7, "litellm_provider": "deepinfra", "mode": "chat", @@ -7938,7 +8474,7 @@ "max_input_tokens": 1000000, "max_output_tokens": 1000000, "input_cost_per_token": 3e-7, - "output_cost_per_token": 2.5e-6, + "output_cost_per_token": 0.0000025, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7947,8 +8483,8 @@ "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, - "input_cost_per_token": 1.25e-6, - "output_cost_per_token": 1e-5, + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -8178,7 +8714,7 @@ "max_input_tokens": 131072, "max_output_tokens": 131072, "input_cost_per_token": 5e-7, - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -8188,7 +8724,7 @@ "max_input_tokens": 262144, "max_output_tokens": 262144, "input_cost_per_token": 5e-7, - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "cache_read_input_token_cost": 4e-7, "litellm_provider": "deepinfra", "mode": "chat", @@ -8249,13 +8785,13 @@ "max_input_tokens": 131072, "max_output_tokens": 131072, "input_cost_per_token": 4e-7, - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true }, "deepseek/deepseek-chat": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 7e-8, "input_cost_per_token": 2.7e-7, "input_cost_per_token_cache_hit": 7e-8, @@ -8264,7 +8800,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.1e-6, + "output_cost_per_token": 0.0000011, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -8292,7 +8828,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.19e-6, + "output_cost_per_token": 0.00000219, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -8307,7 +8843,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.19e-6, + "output_cost_per_token": 0.00000219, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -8315,7 +8851,7 @@ "supports_tool_choice": true }, "deepseek/deepseek-v3": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 7e-8, "input_cost_per_token": 2.7e-7, "input_cost_per_token_cache_hit": 7e-8, @@ -8324,10 +8860,25 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.1e-6, + "output_cost_per_token": 0.0000011, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true + }, + "deepseek/deepseek-v3.2": { + "input_cost_per_token": 2.8e-7, + "input_cost_per_token_cache_hit": 2.8e-8, + "litellm_provider": "deepseek", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-7, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, + "supports_reasoning": true, "supports_tool_choice": true }, "deepseek.v3-v1:0": { @@ -8337,7 +8888,7 @@ "max_output_tokens": 81920, "max_tokens": 163840, "mode": "chat", - "output_cost_per_token": 1.68e-6, + "output_cost_per_token": 0.00000168, "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true @@ -8352,7 +8903,7 @@ "output_cost_per_token": 5e-7 }, "doubao-embedding": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "volcengine", "max_input_tokens": 4096, "max_tokens": 4096, @@ -8360,11 +8911,11 @@ "notes": "Volcengine Doubao embedding model - standard version with 2560 dimensions" }, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 2560 }, "doubao-embedding-large": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "volcengine", "max_input_tokens": 4096, "max_tokens": 4096, @@ -8372,11 +8923,11 @@ "notes": "Volcengine Doubao embedding model - large version with 2048 dimensions" }, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 2048 }, "doubao-embedding-large-text-240915": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "volcengine", "max_input_tokens": 4096, "max_tokens": 4096, @@ -8384,11 +8935,11 @@ "notes": "Volcengine Doubao embedding model - text-240915 version with 4096 dimensions" }, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 4096 }, "doubao-embedding-large-text-250515": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "volcengine", "max_input_tokens": 4096, "max_tokens": 4096, @@ -8396,11 +8947,11 @@ "notes": "Volcengine Doubao embedding model - text-250515 version with 2048 dimensions" }, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 2048 }, "doubao-embedding-text-240715": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "volcengine", "max_input_tokens": 4096, "max_tokens": 4096, @@ -8408,7 +8959,7 @@ "notes": "Volcengine Doubao embedding model - text-240715 version with 2560 dimensions" }, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 2560 }, "exa_ai/search": { @@ -8416,11 +8967,11 @@ "mode": "search", "tiered_pricing": [ { - "input_cost_per_query": 5e-3, + "input_cost_per_query": 0.005, "max_results_range": [0, 25] }, { - "input_cost_per_query": 25e-3, + "input_cost_per_query": 0.025, "max_results_range": [26, 100] } ] @@ -8430,43 +8981,43 @@ "mode": "search", "tiered_pricing": [ { - "input_cost_per_query": 1.66e-3, + "input_cost_per_query": 0.00166, "max_results_range": [1, 10] }, { - "input_cost_per_query": 3.32e-3, + "input_cost_per_query": 0.00332, "max_results_range": [11, 20] }, { - "input_cost_per_query": 4.98e-3, + "input_cost_per_query": 0.00498, "max_results_range": [21, 30] }, { - "input_cost_per_query": 6.64e-3, + "input_cost_per_query": 0.00664, "max_results_range": [31, 40] }, { - "input_cost_per_query": 8.3e-3, + "input_cost_per_query": 0.0083, "max_results_range": [41, 50] }, { - "input_cost_per_query": 9.96e-3, + "input_cost_per_query": 0.00996, "max_results_range": [51, 60] }, { - "input_cost_per_query": 11.62e-3, + "input_cost_per_query": 0.01162, "max_results_range": [61, 70] }, { - "input_cost_per_query": 13.28e-3, + "input_cost_per_query": 0.01328, "max_results_range": [71, 80] }, { - "input_cost_per_query": 14.94e-3, + "input_cost_per_query": 0.01494, "max_results_range": [81, 90] }, { - "input_cost_per_query": 16.6e-3, + "input_cost_per_query": 0.0166, "max_results_range": [91, 100] } ], @@ -8475,20 +9026,20 @@ } }, "perplexity/search": { - "input_cost_per_query": 5e-3, + "input_cost_per_query": 0.005, "litellm_provider": "perplexity", "mode": "search" }, "searxng/search": { "litellm_provider": "searxng", "mode": "search", - "input_cost_per_query": 0.0, + "input_cost_per_query": 0, "metadata": { "notes": "SearXNG is an open-source metasearch engine. Free to use when self-hosted or using public instances." } }, "elevenlabs/scribe_v1": { - "input_cost_per_second": 6.11e-5, + "input_cost_per_second": 0.0000611, "litellm_provider": "elevenlabs", "metadata": { "calculation": "$0.22/hour = $0.00366/minute = $0.0000611 per second (enterprise pricing)", @@ -8496,12 +9047,12 @@ "original_pricing_per_hour": 0.22 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://elevenlabs.io/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, "elevenlabs/scribe_v1_experimental": { - "input_cost_per_second": 6.11e-5, + "input_cost_per_second": 0.0000611, "litellm_provider": "elevenlabs", "metadata": { "calculation": "$0.22/hour = $0.00366/minute = $0.0000611 per second (enterprise pricing)", @@ -8509,7 +9060,7 @@ "original_pricing_per_hour": 0.22 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://elevenlabs.io/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, @@ -8519,7 +9070,7 @@ "max_input_tokens": 1024, "max_tokens": 1024, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "embed-english-light-v3.0": { "input_cost_per_token": 1e-7, @@ -8527,7 +9078,7 @@ "max_input_tokens": 1024, "max_tokens": 1024, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "embed-english-v2.0": { "input_cost_per_token": 1e-7, @@ -8535,7 +9086,7 @@ "max_input_tokens": 4096, "max_tokens": 4096, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "embed-english-v3.0": { "input_cost_per_image": 0.0001, @@ -8547,7 +9098,7 @@ "notes": "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead." }, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_embedding_image_input": true, "supports_image_input": true }, @@ -8557,7 +9108,7 @@ "max_input_tokens": 768, "max_tokens": 768, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "embed-multilingual-v3.0": { "input_cost_per_token": 1e-7, @@ -8565,7 +9116,16 @@ "max_input_tokens": 1024, "max_tokens": 1024, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, + "supports_embedding_image_input": true + }, + "embed-multilingual-light-v3.0": { + "input_cost_per_token": 0.0001, + "litellm_provider": "cohere", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "embedding", + "output_cost_per_token": 0, "supports_embedding_image_input": true }, "eu.amazon.nova-lite-v1:0": { @@ -8595,13 +9155,13 @@ "supports_response_schema": true }, "eu.amazon.nova-pro-v1:0": { - "input_cost_per_token": 1.05e-6, + "input_cost_per_token": 0.00000105, "litellm_provider": "bedrock_converse", "max_input_tokens": 300000, "max_output_tokens": 10000, "max_tokens": 10000, "mode": "chat", - "output_cost_per_token": 4.2e-6, + "output_cost_per_token": 0.0000042, "source": "https://aws.amazon.com/bedrock/pricing/", "supports_function_calling": true, "supports_pdf_input": true, @@ -8616,7 +9176,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -8625,16 +9185,16 @@ "supports_tool_choice": true }, "eu.anthropic.claude-haiku-4-5-20251001-v1:0": { - "cache_creation_input_token_cost": 1.375e-6, + "cache_creation_input_token_cost": 0.000001375, "cache_read_input_token_cost": 1.1e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "deprecation_date": "2026-10-15", "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5.5e-6, + "output_cost_per_token": 0.0000055, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, "supports_computer_use": true, @@ -8648,13 +9208,13 @@ "tool_use_system_prompt_tokens": 346 }, "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -8662,13 +9222,13 @@ "supports_vision": true }, "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -8679,13 +9239,13 @@ "supports_vision": true }, "eu.anthropic.claude-3-7-sonnet-20250219-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -8703,7 +9263,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -8711,26 +9271,26 @@ "supports_vision": true }, "eu.anthropic.claude-3-opus-20240229-v1:0": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true }, "eu.anthropic.claude-3-sonnet-20240229-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -8738,15 +9298,15 @@ "supports_vision": true }, "eu.anthropic.claude-opus-4-1-20250805-v1:0": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -8764,15 +9324,15 @@ "tool_use_system_prompt_tokens": 159 }, "eu.anthropic.claude-opus-4-20250514-v1:0": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -8790,19 +9350,19 @@ "tool_use_system_prompt_tokens": 159 }, "eu.anthropic.claude-sonnet-4-20250514-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -8820,19 +9380,19 @@ "tool_use_system_prompt_tokens": 159 }, "eu.anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 4.125e-6, + "cache_creation_input_token_cost": 0.000004125, "cache_read_input_token_cost": 3.3e-7, - "input_cost_per_token": 3.3e-6, - "input_cost_per_token_above_200k_tokens": 6.6e-6, - "output_cost_per_token_above_200k_tokens": 2.475e-5, - "cache_creation_input_token_cost_above_200k_tokens": 8.25e-6, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.65e-5, + "output_cost_per_token": 0.0000165, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -8872,13 +9432,13 @@ "supports_tool_choice": false }, "eu.mistral.pixtral-large-2502-v1:0": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "bedrock_converse", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_function_calling": true, "supports_tool_choice": false }, @@ -8974,9 +9534,9 @@ "output_cost_per_token": 2e-7 }, "fireworks-ai-56b-to-176b": { - "input_cost_per_token": 1.2e-6, + "input_cost_per_token": 0.0000012, "litellm_provider": "fireworks_ai", - "output_cost_per_token": 1.2e-6 + "output_cost_per_token": 0.0000012 }, "fireworks-ai-above-16b": { "input_cost_per_token": 9e-7, @@ -8984,19 +9544,19 @@ "output_cost_per_token": 9e-7 }, "fireworks-ai-default": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "fireworks_ai", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "fireworks-ai-embedding-150m-to-350m": { "input_cost_per_token": 1.6e-8, "litellm_provider": "fireworks_ai-embedding-models", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "fireworks-ai-embedding-up-to-150m": { "input_cost_per_token": 8e-9, "litellm_provider": "fireworks_ai-embedding-models", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "fireworks-ai-moe-up-to-56b": { "input_cost_per_token": 5e-7, @@ -9014,42 +9574,42 @@ "max_input_tokens": 512, "max_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://fireworks.ai/pricing" }, "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct": { - "input_cost_per_token": 1.2e-6, + "input_cost_per_token": 0.0000012, "litellm_provider": "fireworks_ai", "max_input_tokens": 65536, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://fireworks.ai/pricing", "supports_function_calling": false, "supports_response_schema": true, "supports_tool_choice": false }, "fireworks_ai/accounts/fireworks/models/deepseek-r1": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "fireworks_ai", "max_input_tokens": 128000, "max_output_tokens": 20480, "max_tokens": 20480, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "source": "https://fireworks.ai/pricing", "supports_response_schema": true, "supports_tool_choice": false }, "fireworks_ai/accounts/fireworks/models/deepseek-r1-0528": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "fireworks_ai", "max_input_tokens": 160000, "max_output_tokens": 160000, "max_tokens": 160000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "source": "https://fireworks.ai/pricing", "supports_response_schema": true, "supports_tool_choice": false @@ -9061,7 +9621,7 @@ "max_output_tokens": 20480, "max_tokens": 20480, "mode": "chat", - "output_cost_per_token": 2.19e-6, + "output_cost_per_token": 0.00000219, "source": "https://fireworks.ai/pricing", "supports_response_schema": true, "supports_tool_choice": false @@ -9097,7 +9657,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.68e-6, + "output_cost_per_token": 0.00000168, "source": "https://fireworks.ai/pricing", "supports_response_schema": true, "supports_tool_choice": true @@ -9109,11 +9669,24 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.68e-6, + "output_cost_per_token": 0.00000168, "source": "https://fireworks.ai/pricing", "supports_response_schema": true, "supports_tool_choice": true }, + "fireworks_ai/accounts/fireworks/models/deepseek-v3p2": { + "input_cost_per_token": 0.0000012, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "source": "https://fireworks.ai/models/fireworks/deepseek-v3p2", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "fireworks_ai/accounts/fireworks/models/firefunction-v2": { "input_cost_per_token": 9e-7, "litellm_provider": "fireworks_ai", @@ -9134,7 +9707,7 @@ "max_output_tokens": 96000, "max_tokens": 96000, "mode": "chat", - "output_cost_per_token": 2.19e-6, + "output_cost_per_token": 0.00000219, "source": "https://fireworks.ai/models/fireworks/glm-4p5", "supports_function_calling": true, "supports_response_schema": true, @@ -9153,6 +9726,19 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "fireworks_ai/accounts/fireworks/models/glm-4p6": { + "input_cost_per_token": 5.5e-7, + "output_cost_per_token": 0.00000219, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 202800, + "max_output_tokens": 202800, + "max_tokens": 202800, + "mode": "chat", + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "fireworks_ai/accounts/fireworks/models/gpt-oss-120b": { "input_cost_per_token": 1.5e-7, "litellm_provider": "fireworks_ai", @@ -9186,12 +9772,25 @@ "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.5e-6, + "output_cost_per_token": 0.0000025, "source": "https://fireworks.ai/models/fireworks/kimi-k2-instruct", "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true }, + "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct-0905": { + "input_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 262144, + "max_output_tokens": 32768, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://app.fireworks.ai/models/fireworks/kimi-k2-instruct-0905", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "fireworks_ai/accounts/fireworks/models/kimi-k2-thinking": { "input_cost_per_token": 6e-7, "litellm_provider": "fireworks_ai", @@ -9199,7 +9798,7 @@ "max_output_tokens": 262144, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 2.5e-6, + "output_cost_per_token": 0.0000025, "source": "https://fireworks.ai/pricing", "supports_function_calling": true, "supports_response_schema": true, @@ -9207,13 +9806,13 @@ "supports_web_search": true }, "fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "fireworks_ai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://fireworks.ai/pricing", "supports_function_calling": true, "supports_response_schema": true, @@ -9310,13 +9909,13 @@ "supports_tool_choice": false }, "fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf": { - "input_cost_per_token": 1.2e-6, + "input_cost_per_token": 0.0000012, "litellm_provider": "fireworks_ai", "max_input_tokens": 65536, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://fireworks.ai/pricing", "supports_function_calling": true, "supports_response_schema": true, @@ -9349,13 +9948,13 @@ "supports_tool_choice": false }, "fireworks_ai/accounts/fireworks/models/yi-large": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "fireworks_ai", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://fireworks.ai/pricing", "supports_function_calling": false, "supports_response_schema": true, @@ -9367,7 +9966,7 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://fireworks.ai/pricing" }, "fireworks_ai/nomic-ai/nomic-embed-text-v1.5": { @@ -9376,7 +9975,7 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://fireworks.ai/pricing" }, "fireworks_ai/thenlper/gte-base": { @@ -9385,7 +9984,7 @@ "max_input_tokens": 512, "max_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://fireworks.ai/pricing" }, "fireworks_ai/thenlper/gte-large": { @@ -9394,7 +9993,7 @@ "max_input_tokens": 512, "max_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://fireworks.ai/pricing" }, "friendliai/meta-llama-3.1-70b-instruct": { @@ -9426,121 +10025,122 @@ "supports_tool_choice": true }, "ft:babbage-002": { - "input_cost_per_token": 4e-7, + "input_cost_per_token": 0.0000016, "input_cost_per_token_batches": 2e-7, "litellm_provider": "text-completion-openai", "max_input_tokens": 16384, "max_output_tokens": 4096, "max_tokens": 16384, "mode": "completion", - "output_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 2e-7 }, "ft:davinci-002": { - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000012, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "text-completion-openai", "max_input_tokens": 16384, "max_output_tokens": 4096, "max_tokens": 16384, "mode": "completion", - "output_cost_per_token": 2e-6, - "output_cost_per_token_batches": 1e-6 + "output_cost_per_token": 0.000012, + "output_cost_per_token_batches": 0.000001 }, "ft:gpt-3.5-turbo": { - "input_cost_per_token": 3e-6, - "input_cost_per_token_batches": 1.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_batches": 0.0000015, "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-6, - "output_cost_per_token_batches": 3e-6, + "output_cost_per_token": 0.000006, + "output_cost_per_token_batches": 0.000003, "supports_system_messages": true, "supports_tool_choice": true }, "ft:gpt-3.5-turbo-0125": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_system_messages": true, "supports_tool_choice": true }, "ft:gpt-3.5-turbo-0613": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openai", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_system_messages": true, "supports_tool_choice": true }, "ft:gpt-3.5-turbo-1106": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_system_messages": true, "supports_tool_choice": true }, "ft:gpt-4-0613": { - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "openai", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing", "supports_function_calling": true, "supports_system_messages": true, "supports_tool_choice": true }, "ft:gpt-4o-2024-08-06": { - "input_cost_per_token": 3.75e-6, - "input_cost_per_token_batches": 1.875e-6, + "cache_read_input_token_cost": 0.000001875, + "input_cost_per_token": 0.00000375, + "input_cost_per_token_batches": 0.000001875, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_batches": 7.5e-6, + "output_cost_per_token": 0.000015, + "output_cost_per_token_batches": 0.0000075, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, + "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true }, "ft:gpt-4o-2024-11-20": { - "cache_creation_input_token_cost": 1.875e-6, - "input_cost_per_token": 3.75e-6, + "cache_creation_input_token_cost": 0.000001875, + "input_cost_per_token": 0.00000375, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true + "supports_tool_choice": true }, "ft:gpt-4o-mini-2024-07-18": { "cache_read_input_token_cost": 1.5e-7, @@ -9551,7 +10151,7 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "output_cost_per_token_batches": 6e-7, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9559,8 +10159,79 @@ "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true + "supports_tool_choice": true + }, + "ft:gpt-4.1-2025-04-14": { + "cache_read_input_token_cost": 7.5e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_batches": 0.0000015, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0.000012, + "output_cost_per_token_batches": 0.000006, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4.1-mini-2025-04-14": { + "cache_read_input_token_cost": 2e-7, + "input_cost_per_token": 8e-7, + "input_cost_per_token_batches": 4e-7, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0.0000032, + "output_cost_per_token_batches": 0.0000016, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4.1-nano-2025-04-14": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_batches": 1e-7, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-7, + "output_cost_per_token_batches": 4e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:o4-mini-2025-04-16": { + "cache_read_input_token_cost": 0.000001, + "input_cost_per_token": 0.000004, + "input_cost_per_token_batches": 0.000002, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 0.000016, + "output_cost_per_token_batches": 0.000008, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true }, "gemini-1.0-pro": { "input_cost_per_character": 1.25e-7, @@ -9573,7 +10244,7 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_character": 3.75e-7, - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9591,7 +10262,7 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_character": 3.75e-7, - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9609,7 +10280,7 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_character": 3.75e-7, - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9626,7 +10297,7 @@ "max_video_length": 2, "max_videos_per_prompt": 1, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9645,7 +10316,7 @@ "max_video_length": 2, "max_videos_per_prompt": 1, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9663,7 +10334,7 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_character": 3.75e-7, - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9680,23 +10351,23 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_character": 3.75e-7, - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "gemini-1.5-flash": { - "input_cost_per_audio_per_second": 2e-6, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, "input_cost_per_character": 1.875e-8, "input_cost_per_character_above_128k_tokens": 2.5e-7, - "input_cost_per_image": 2e-5, - "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_image": 0.00002, + "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_token": 7.5e-8, - "input_cost_per_token_above_128k_tokens": 1e-6, - "input_cost_per_video_per_second": 2e-5, - "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -9722,16 +10393,16 @@ }, "gemini-1.5-flash-001": { "deprecation_date": "2025-05-24", - "input_cost_per_audio_per_second": 2e-6, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, "input_cost_per_character": 1.875e-8, "input_cost_per_character_above_128k_tokens": 2.5e-7, - "input_cost_per_image": 2e-5, - "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_image": 0.00002, + "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_token": 7.5e-8, - "input_cost_per_token_above_128k_tokens": 1e-6, - "input_cost_per_video_per_second": 2e-5, - "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -9757,16 +10428,16 @@ }, "gemini-1.5-flash-002": { "deprecation_date": "2025-09-24", - "input_cost_per_audio_per_second": 2e-6, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, "input_cost_per_character": 1.875e-8, "input_cost_per_character_above_128k_tokens": 2.5e-7, - "input_cost_per_image": 2e-5, - "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_image": 0.00002, + "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_token": 7.5e-8, - "input_cost_per_token_above_128k_tokens": 1e-6, - "input_cost_per_video_per_second": 2e-5, - "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -9791,16 +10462,16 @@ "supports_vision": true }, "gemini-1.5-flash-exp-0827": { - "input_cost_per_audio_per_second": 2e-6, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, "input_cost_per_character": 1.875e-8, "input_cost_per_character_above_128k_tokens": 2.5e-7, - "input_cost_per_image": 2e-5, - "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_image": 0.00002, + "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_token": 4.688e-9, - "input_cost_per_token_above_128k_tokens": 1e-6, - "input_cost_per_video_per_second": 2e-5, - "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -9825,16 +10496,16 @@ "supports_vision": true }, "gemini-1.5-flash-preview-0514": { - "input_cost_per_audio_per_second": 2e-6, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, "input_cost_per_character": 1.875e-8, "input_cost_per_character_above_128k_tokens": 2.5e-7, - "input_cost_per_image": 2e-5, - "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_image": 0.00002, + "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_token": 7.5e-8, - "input_cost_per_token_above_128k_tokens": 1e-6, - "input_cost_per_video_per_second": 2e-5, - "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -9858,14 +10529,14 @@ "supports_vision": true }, "gemini-1.5-pro": { - "input_cost_per_audio_per_second": 3.125e-5, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, "input_cost_per_character_above_128k_tokens": 6.25e-7, "input_cost_per_image": 0.00032875, "input_cost_per_image_above_128k_tokens": 0.0006575, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_128k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second": 0.00032875, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, "litellm_provider": "vertex_ai-language-models", @@ -9873,10 +10544,10 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_character": 1.25e-6, - "output_cost_per_character_above_128k_tokens": 2.5e-6, - "output_cost_per_token": 5e-6, - "output_cost_per_token_above_128k_tokens": 1e-5, + "output_cost_per_character": 0.00000125, + "output_cost_per_character_above_128k_tokens": 0.0000025, + "output_cost_per_token": 0.000005, + "output_cost_per_token_above_128k_tokens": 0.00001, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9888,14 +10559,14 @@ }, "gemini-1.5-pro-001": { "deprecation_date": "2025-05-24", - "input_cost_per_audio_per_second": 3.125e-5, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, "input_cost_per_character_above_128k_tokens": 6.25e-7, "input_cost_per_image": 0.00032875, "input_cost_per_image_above_128k_tokens": 0.0006575, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_128k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second": 0.00032875, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, "litellm_provider": "vertex_ai-language-models", @@ -9903,10 +10574,10 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_character": 1.25e-6, - "output_cost_per_character_above_128k_tokens": 2.5e-6, - "output_cost_per_token": 5e-6, - "output_cost_per_token_above_128k_tokens": 1e-5, + "output_cost_per_character": 0.00000125, + "output_cost_per_character_above_128k_tokens": 0.0000025, + "output_cost_per_token": 0.000005, + "output_cost_per_token_above_128k_tokens": 0.00001, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9917,14 +10588,14 @@ }, "gemini-1.5-pro-002": { "deprecation_date": "2025-09-24", - "input_cost_per_audio_per_second": 3.125e-5, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, "input_cost_per_character_above_128k_tokens": 6.25e-7, "input_cost_per_image": 0.00032875, "input_cost_per_image_above_128k_tokens": 0.0006575, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_128k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second": 0.00032875, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, "litellm_provider": "vertex_ai-language-models", @@ -9932,10 +10603,10 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_character": 1.25e-6, - "output_cost_per_character_above_128k_tokens": 2.5e-6, - "output_cost_per_token": 5e-6, - "output_cost_per_token_above_128k_tokens": 1e-5, + "output_cost_per_character": 0.00000125, + "output_cost_per_character_above_128k_tokens": 0.0000025, + "output_cost_per_token": 0.000005, + "output_cost_per_token_above_128k_tokens": 0.00001, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-pro", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9945,8 +10616,8 @@ "supports_vision": true }, "gemini-1.5-pro-preview-0215": { - "input_cost_per_audio_per_second": 3.125e-5, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, "input_cost_per_character_above_128k_tokens": 6.25e-7, "input_cost_per_image": 0.00032875, @@ -9960,8 +10631,8 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_character": 1.25e-6, - "output_cost_per_character_above_128k_tokens": 2.5e-6, + "output_cost_per_character": 0.00000125, + "output_cost_per_character_above_128k_tokens": 0.0000025, "output_cost_per_token": 3.125e-7, "output_cost_per_token_above_128k_tokens": 6.25e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -9972,8 +10643,8 @@ "supports_tool_choice": true }, "gemini-1.5-pro-preview-0409": { - "input_cost_per_audio_per_second": 3.125e-5, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, "input_cost_per_character_above_128k_tokens": 6.25e-7, "input_cost_per_image": 0.00032875, @@ -9987,8 +10658,8 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_character": 1.25e-6, - "output_cost_per_character_above_128k_tokens": 2.5e-6, + "output_cost_per_character": 0.00000125, + "output_cost_per_character_above_128k_tokens": 0.0000025, "output_cost_per_token": 3.125e-7, "output_cost_per_token_above_128k_tokens": 6.25e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -9998,8 +10669,8 @@ "supports_tool_choice": true }, "gemini-1.5-pro-preview-0514": { - "input_cost_per_audio_per_second": 3.125e-5, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, "input_cost_per_character_above_128k_tokens": 6.25e-7, "input_cost_per_image": 0.00032875, @@ -10013,8 +10684,8 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_character": 1.25e-6, - "output_cost_per_character_above_128k_tokens": 2.5e-6, + "output_cost_per_character": 0.00000125, + "output_cost_per_character_above_128k_tokens": 0.0000025, "output_cost_per_token": 3.125e-7, "output_cost_per_token_above_128k_tokens": 6.25e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -10058,7 +10729,7 @@ "gemini-2.0-flash-001": { "cache_read_input_token_cost": 3.75e-8, "deprecation_date": "2026-02-05", - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 1.5e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10184,10 +10855,10 @@ }, "gemini-2.0-flash-live-preview-04-09": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 3e-6, - "input_cost_per_image": 3e-6, + "input_cost_per_audio_token": 0.000003, + "input_cost_per_image": 0.000003, "input_cost_per_token": 5e-7, - "input_cost_per_video_per_second": 3e-6, + "input_cost_per_video_per_second": 0.000003, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10199,8 +10870,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_audio_token": 1.2e-5, - "output_cost_per_token": 2e-6, + "output_cost_per_audio_token": 0.000012, + "output_cost_per_token": 0.000002, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini#gemini-2-0-flash-live-preview-04-09", "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], @@ -10250,7 +10921,7 @@ "supports_web_search": true }, "gemini-2.0-flash-thinking-exp": { - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, "input_cost_per_character": 0, @@ -10290,7 +10961,7 @@ "supports_web_search": true }, "gemini-2.0-flash-thinking-exp-01-21": { - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, "input_cost_per_character": 0, @@ -10332,8 +11003,8 @@ }, "gemini-2.0-pro-exp-02-05": { "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10345,8 +11016,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], "supported_modalities": ["text", "image", "audio", "video"], @@ -10365,7 +11036,7 @@ }, "gemini-2.5-flash": { "cache_read_input_token_cost": 3e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10378,8 +11049,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], "supported_modalities": ["text", "image", "audio", "video"], @@ -10399,7 +11070,7 @@ }, "gemini-2.5-flash-image": { "cache_read_input_token_cost": 3e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10413,8 +11084,8 @@ "max_videos_per_prompt": 10, "mode": "image_generation", "output_cost_per_image": 0.039, - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], @@ -10435,7 +11106,7 @@ }, "gemini-2.5-flash-image-preview": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10449,8 +11120,8 @@ "max_videos_per_prompt": 10, "mode": "image_generation", "output_cost_per_image": 0.039, - "output_cost_per_reasoning_token": 3e-5, - "output_cost_per_token": 3e-5, + "output_cost_per_reasoning_token": 0.00003, + "output_cost_per_token": 0.00003, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], @@ -10471,16 +11142,17 @@ }, "gemini-3-pro-image-preview": { "input_cost_per_image": 0.0011, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "vertex_ai-language-models", "max_input_tokens": 65536, "max_output_tokens": 32768, "max_tokens": 65536, "mode": "image_generation", "output_cost_per_image": 0.134, - "output_cost_per_token": 1.2e-5, - "output_cost_per_token_batches": 6e-6, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 0.000012, + "output_cost_per_token_batches": 0.000006, "source": "https://ai.google.dev/gemini-api/docs/pricing", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], "supported_modalities": ["text", "image"], @@ -10562,7 +11234,7 @@ }, "gemini-2.5-flash-preview-09-2025": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10575,8 +11247,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], "supported_modalities": ["text", "image", "audio", "video"], @@ -10596,7 +11268,7 @@ }, "gemini-live-2.5-flash-preview-native-audio-09-2025": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 3e-6, + "input_cost_per_audio_token": 0.000003, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10609,8 +11281,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_audio_token": 1.2e-5, - "output_cost_per_token": 2e-6, + "output_cost_per_audio_token": 0.000012, + "output_cost_per_token": 0.000002, "source": "https://ai.google.dev/gemini-api/docs/pricing", "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], "supported_modalities": ["text", "image", "audio", "video"], @@ -10630,7 +11302,7 @@ }, "gemini/gemini-live-2.5-flash-preview-native-audio-09-2025": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 3e-6, + "input_cost_per_audio_token": 0.000003, "input_cost_per_token": 3e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -10643,8 +11315,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_audio_token": 1.2e-5, - "output_cost_per_token": 2e-6, + "output_cost_per_audio_token": 0.000012, + "output_cost_per_token": 0.000002, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing", "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], @@ -10700,7 +11372,7 @@ }, "gemini-2.5-flash-preview-04-17": { "cache_read_input_token_cost": 3.75e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 1.5e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10713,7 +11385,7 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 3.5e-6, + "output_cost_per_reasoning_token": 0.0000035, "output_cost_per_token": 6e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], @@ -10733,7 +11405,7 @@ }, "gemini-2.5-flash-preview-05-20": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10746,8 +11418,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], "supported_modalities": ["text", "image", "audio", "video"], @@ -10768,8 +11440,8 @@ "gemini-2.5-pro": { "cache_read_input_token_cost": 1.25e-7, "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10781,8 +11453,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], "supported_modalities": ["text", "image", "audio", "video"], @@ -10803,9 +11475,9 @@ "cache_read_input_token_cost": 2e-7, "cache_read_input_token_cost_above_200k_tokens": 4e-7, "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_above_200k_tokens": 4e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10817,9 +11489,9 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1.2e-5, - "output_cost_per_token_above_200k_tokens": 1.8e-5, - "output_cost_per_token_batches": 6e-6, + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], "supported_modalities": ["text", "image", "audio", "video"], @@ -10840,9 +11512,9 @@ "cache_read_input_token_cost": 2e-7, "cache_read_input_token_cost_above_200k_tokens": 4e-7, "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_above_200k_tokens": 4e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "vertex_ai", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10854,9 +11526,9 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1.2e-5, - "output_cost_per_token_above_200k_tokens": 1.8e-5, - "output_cost_per_token_batches": 6e-6, + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], "supported_modalities": ["text", "image", "audio", "video"], @@ -10875,8 +11547,8 @@ }, "gemini-2.5-pro-exp-03-25": { "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10888,8 +11560,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], "supported_modalities": ["text", "image", "audio", "video"], @@ -10908,9 +11580,9 @@ }, "gemini-2.5-pro-preview-03-25": { "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_audio_token": 1.25e-6, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_audio_token": 0.00000125, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10922,8 +11594,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], "supported_modalities": ["text", "image", "audio", "video"], @@ -10942,9 +11614,9 @@ }, "gemini-2.5-pro-preview-05-06": { "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_audio_token": 1.25e-6, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_audio_token": 0.00000125, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10956,8 +11628,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], "supported_modalities": ["text", "image", "audio", "video"], @@ -10977,9 +11649,9 @@ }, "gemini-2.5-pro-preview-06-05": { "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_audio_token": 1.25e-6, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_audio_token": 0.00000125, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10991,8 +11663,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], "supported_modalities": ["text", "image", "audio", "video"], @@ -11012,8 +11684,8 @@ "gemini-2.5-pro-preview-tts": { "cache_read_input_token_cost": 3.125e-7, "input_cost_per_audio_token": 7e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -11025,8 +11697,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", "supported_modalities": ["text"], "supported_output_modalities": ["audio"], @@ -11076,7 +11748,7 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_character": 3.75e-7, - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11108,7 +11780,7 @@ "max_video_length": 2, "max_videos_per_prompt": 1, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11153,7 +11825,7 @@ "tpm": 4000000 }, "gemini/gemini-1.5-flash-001": { - "cache_creation_input_token_cost": 1e-6, + "cache_creation_input_token_cost": 0.000001, "cache_read_input_token_cost": 1.875e-8, "deprecation_date": "2025-05-24", "input_cost_per_token": 7.5e-8, @@ -11182,7 +11854,7 @@ "tpm": 4000000 }, "gemini/gemini-1.5-flash-002": { - "cache_creation_input_token_cost": 1e-6, + "cache_creation_input_token_cost": 0.000001, "cache_read_input_token_cost": 1.875e-8, "deprecation_date": "2025-09-24", "input_cost_per_token": 7.5e-8, @@ -11339,15 +12011,15 @@ "tpm": 4000000 }, "gemini/gemini-1.5-pro": { - "input_cost_per_token": 3.5e-6, - "input_cost_per_token_above_128k_tokens": 7e-6, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, "litellm_provider": "gemini", "max_input_tokens": 2097152, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-5, - "output_cost_per_token_above_128k_tokens": 2.1e-5, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "rpm": 1000, "source": "https://ai.google.dev/pricing", "supports_function_calling": true, @@ -11359,15 +12031,15 @@ }, "gemini/gemini-1.5-pro-001": { "deprecation_date": "2025-05-24", - "input_cost_per_token": 3.5e-6, - "input_cost_per_token_above_128k_tokens": 7e-6, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, "litellm_provider": "gemini", "max_input_tokens": 2097152, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-5, - "output_cost_per_token_above_128k_tokens": 2.1e-5, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "rpm": 1000, "source": "https://ai.google.dev/pricing", "supports_function_calling": true, @@ -11380,15 +12052,15 @@ }, "gemini/gemini-1.5-pro-002": { "deprecation_date": "2025-09-24", - "input_cost_per_token": 3.5e-6, - "input_cost_per_token_above_128k_tokens": 7e-6, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, "litellm_provider": "gemini", "max_input_tokens": 2097152, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-5, - "output_cost_per_token_above_128k_tokens": 2.1e-5, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "rpm": 1000, "source": "https://ai.google.dev/pricing", "supports_function_calling": true, @@ -11400,15 +12072,15 @@ "tpm": 4000000 }, "gemini/gemini-1.5-pro-exp-0801": { - "input_cost_per_token": 3.5e-6, - "input_cost_per_token_above_128k_tokens": 7e-6, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, "litellm_provider": "gemini", "max_input_tokens": 2097152, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-5, - "output_cost_per_token_above_128k_tokens": 2.1e-5, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "rpm": 1000, "source": "https://ai.google.dev/pricing", "supports_function_calling": true, @@ -11438,15 +12110,15 @@ "tpm": 4000000 }, "gemini/gemini-1.5-pro-latest": { - "input_cost_per_token": 3.5e-6, - "input_cost_per_token_above_128k_tokens": 7e-6, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, "litellm_provider": "gemini", "max_input_tokens": 1048576, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-6, - "output_cost_per_token_above_128k_tokens": 2.1e-5, + "output_cost_per_token": 0.00000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "rpm": 1000, "source": "https://ai.google.dev/pricing", "supports_function_calling": true, @@ -11519,7 +12191,7 @@ "tpm": 10000000 }, "gemini/gemini-2.0-flash-exp": { - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, "input_cost_per_character": 0, @@ -11620,10 +12292,10 @@ }, "gemini/gemini-2.0-flash-live-001": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 2.1e-6, - "input_cost_per_image": 2.1e-6, + "input_cost_per_audio_token": 0.0000021, + "input_cost_per_image": 0.0000021, "input_cost_per_token": 3.5e-7, - "input_cost_per_video_per_second": 2.1e-6, + "input_cost_per_video_per_second": 0.0000021, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -11635,8 +12307,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_audio_token": 8.5e-6, - "output_cost_per_token": 1.5e-6, + "output_cost_per_audio_token": 0.0000085, + "output_cost_per_token": 0.0000015, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2-0-flash-live-001", "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], @@ -11687,7 +12359,7 @@ "tpm": 10000000 }, "gemini/gemini-2.0-flash-thinking-exp": { - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, "input_cost_per_character": 0, @@ -11728,7 +12400,7 @@ "tpm": 4000000 }, "gemini/gemini-2.0-flash-thinking-exp-01-21": { - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, "input_cost_per_character": 0, @@ -11770,7 +12442,7 @@ "tpm": 4000000 }, "gemini/gemini-2.0-pro-exp-02-05": { - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, "input_cost_per_character": 0, @@ -11812,7 +12484,7 @@ }, "gemini/gemini-2.5-flash": { "cache_read_input_token_cost": 3e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -11825,8 +12497,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], @@ -11848,7 +12520,7 @@ }, "gemini/gemini-2.5-flash-image": { "cache_read_input_token_cost": 3e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -11863,8 +12535,8 @@ "max_videos_per_prompt": 10, "mode": "image_generation", "output_cost_per_image": 0.039, - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], @@ -11885,7 +12557,7 @@ }, "gemini/gemini-2.5-flash-image-preview": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -11899,8 +12571,8 @@ "max_videos_per_prompt": 10, "mode": "image_generation", "output_cost_per_image": 0.039, - "output_cost_per_reasoning_token": 3e-5, - "output_cost_per_token": 3e-5, + "output_cost_per_reasoning_token": 0.00003, + "output_cost_per_token": 0.00003, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], @@ -11921,18 +12593,19 @@ }, "gemini/gemini-3-pro-image-preview": { "input_cost_per_image": 0.0011, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "gemini", "max_input_tokens": 65536, "max_output_tokens": 32768, "max_tokens": 65536, "mode": "image_generation", "output_cost_per_image": 0.134, - "output_cost_per_token": 1.2e-5, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 0.000012, "rpm": 1000, "tpm": 4000000, - "output_cost_per_token_batches": 6e-6, + "output_cost_per_token_batches": 0.000006, "source": "https://ai.google.dev/gemini-api/docs/pricing", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], "supported_modalities": ["text", "image"], @@ -12018,7 +12691,7 @@ }, "gemini/gemini-2.5-flash-preview-09-2025": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -12031,8 +12704,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], @@ -12054,7 +12727,7 @@ }, "gemini/gemini-flash-latest": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -12067,8 +12740,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], @@ -12162,7 +12835,7 @@ }, "gemini/gemini-2.5-flash-preview-04-17": { "cache_read_input_token_cost": 3.75e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 1.5e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -12175,7 +12848,7 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 3.5e-6, + "output_cost_per_reasoning_token": 0.0000035, "output_cost_per_token": 6e-7, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", @@ -12196,7 +12869,7 @@ }, "gemini/gemini-2.5-flash-preview-05-20": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -12209,8 +12882,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], @@ -12231,7 +12904,7 @@ }, "gemini/gemini-2.5-flash-preview-tts": { "cache_read_input_token_cost": 3.75e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 1.5e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -12244,7 +12917,7 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 3.5e-6, + "output_cost_per_reasoning_token": 0.0000035, "output_cost_per_token": 6e-7, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", @@ -12264,8 +12937,8 @@ }, "gemini/gemini-2.5-pro": { "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -12277,8 +12950,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], @@ -12300,9 +12973,9 @@ "gemini/gemini-3-pro-preview": { "cache_read_input_token_cost": 2e-7, "cache_read_input_token_cost_above_200k_tokens": 4e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_above_200k_tokens": 4e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -12314,9 +12987,9 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1.2e-5, - "output_cost_per_token_above_200k_tokens": 1.8e-5, - "output_cost_per_token_batches": 6e-6, + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], @@ -12336,9 +13009,9 @@ "tpm": 800000 }, "gemini/gemini-2.5-pro-exp-03-25": { - "cache_read_input_token_cost": 0.0, - "input_cost_per_token": 0.0, - "input_cost_per_token_above_200k_tokens": 0.0, + "cache_read_input_token_cost": 0, + "input_cost_per_token": 0, + "input_cost_per_token_above_200k_tokens": 0, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -12350,8 +13023,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0.0, - "output_cost_per_token_above_200k_tokens": 0.0, + "output_cost_per_token": 0, + "output_cost_per_token_above_200k_tokens": 0, "rpm": 5, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], @@ -12372,8 +13045,8 @@ "gemini/gemini-2.5-pro-preview-03-25": { "cache_read_input_token_cost": 3.125e-7, "input_cost_per_audio_token": 7e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -12385,8 +13058,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", "supported_modalities": ["text", "image", "audio", "video"], @@ -12405,8 +13078,8 @@ "gemini/gemini-2.5-pro-preview-05-06": { "cache_read_input_token_cost": 3.125e-7, "input_cost_per_audio_token": 7e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -12418,8 +13091,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", "supported_modalities": ["text", "image", "audio", "video"], @@ -12439,8 +13112,8 @@ "gemini/gemini-2.5-pro-preview-06-05": { "cache_read_input_token_cost": 3.125e-7, "input_cost_per_audio_token": 7e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -12452,8 +13125,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", "supported_modalities": ["text", "image", "audio", "video"], @@ -12473,8 +13146,8 @@ "gemini/gemini-2.5-pro-preview-tts": { "cache_read_input_token_cost": 3.125e-7, "input_cost_per_audio_token": 7e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -12486,8 +13159,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", "supported_modalities": ["text"], @@ -12566,7 +13239,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-6, + "output_cost_per_token": 0.00000105, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_tool_choice": true, @@ -12578,7 +13251,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-6, + "output_cost_per_token": 0.00000105, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_tool_choice": true, @@ -12592,8 +13265,8 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-6, - "output_cost_per_token_above_128k_tokens": 2.1e-6, + "output_cost_per_token": 0.00000105, + "output_cost_per_token_above_128k_tokens": 0.0000021, "rpd": 30000, "rpm": 360, "source": "https://ai.google.dev/gemini-api/docs/models/gemini", @@ -12609,8 +13282,8 @@ "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 1.05e-6, - "output_cost_per_token_above_128k_tokens": 2.1e-6, + "output_cost_per_token": 0.00000105, + "output_cost_per_token_above_128k_tokens": 0.0000021, "rpd": 30000, "rpm": 360, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -12767,19 +13440,19 @@ "mode": "search" }, "global.anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -12797,19 +13470,19 @@ "tool_use_system_prompt_tokens": 346 }, "global.anthropic.claude-sonnet-4-20250514-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -12827,15 +13500,15 @@ "tool_use_system_prompt_tokens": 159 }, "global.anthropic.claude-haiku-4-5-20251001-v1:0": { - "cache_creation_input_token_cost": 1.375e-6, + "cache_creation_input_token_cost": 0.000001375, "cache_read_input_token_cost": 1.1e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5.5e-6, + "output_cost_per_token": 0.0000055, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, "supports_computer_use": true, @@ -12849,13 +13522,13 @@ "tool_use_system_prompt_tokens": 346 }, "gpt-3.5-turbo": { - "input_cost_per_token": 0.5e-6, + "input_cost_per_token": 5e-7, "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 4097, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_prompt_caching": true, "supports_system_messages": true, @@ -12868,7 +13541,7 @@ "max_output_tokens": 4096, "max_tokens": 16385, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -12876,25 +13549,25 @@ "supports_tool_choice": true }, "gpt-3.5-turbo-0301": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "openai", "max_input_tokens": 4097, "max_output_tokens": 4096, "max_tokens": 4097, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_prompt_caching": true, "supports_system_messages": true, "supports_tool_choice": true }, "gpt-3.5-turbo-0613": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "openai", "max_input_tokens": 4097, "max_output_tokens": 4096, "max_tokens": 4097, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_prompt_caching": true, "supports_system_messages": true, @@ -12902,13 +13575,13 @@ }, "gpt-3.5-turbo-1106": { "deprecation_date": "2026-09-28", - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 16385, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -12916,55 +13589,55 @@ "supports_tool_choice": true }, "gpt-3.5-turbo-16k": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 16385, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_prompt_caching": true, "supports_system_messages": true, "supports_tool_choice": true }, "gpt-3.5-turbo-16k-0613": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 16385, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_prompt_caching": true, "supports_system_messages": true, "supports_tool_choice": true }, "gpt-3.5-turbo-instruct": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "text-completion-openai", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "gpt-3.5-turbo-instruct-0914": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "text-completion-openai", "max_input_tokens": 8192, "max_output_tokens": 4097, "max_tokens": 4097, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "gpt-4": { - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "openai", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_prompt_caching": true, "supports_system_messages": true, @@ -12972,13 +13645,13 @@ }, "gpt-4-0125-preview": { "deprecation_date": "2026-03-26", - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -12986,26 +13659,26 @@ "supports_tool_choice": true }, "gpt-4-0314": { - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "openai", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_prompt_caching": true, "supports_system_messages": true, "supports_tool_choice": true }, "gpt-4-0613": { "deprecation_date": "2025-06-06", - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "openai", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_prompt_caching": true, "supports_system_messages": true, @@ -13013,13 +13686,13 @@ }, "gpt-4-1106-preview": { "deprecation_date": "2026-03-26", - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -13028,13 +13701,13 @@ }, "gpt-4-1106-vision-preview": { "deprecation_date": "2024-12-06", - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_system_messages": true, @@ -13042,7 +13715,7 @@ "supports_vision": true }, "gpt-4-32k": { - "input_cost_per_token": 6e-5, + "input_cost_per_token": 0.00006, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 4096, @@ -13054,7 +13727,7 @@ "supports_tool_choice": true }, "gpt-4-32k-0314": { - "input_cost_per_token": 6e-5, + "input_cost_per_token": 0.00006, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 4096, @@ -13066,7 +13739,7 @@ "supports_tool_choice": true }, "gpt-4-32k-0613": { - "input_cost_per_token": 6e-5, + "input_cost_per_token": 0.00006, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 4096, @@ -13078,13 +13751,13 @@ "supports_tool_choice": true }, "gpt-4-turbo": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13094,13 +13767,13 @@ "supports_vision": true }, "gpt-4-turbo-2024-04-09": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13110,13 +13783,13 @@ "supports_vision": true }, "gpt-4-turbo-preview": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13126,13 +13799,13 @@ }, "gpt-4-vision-preview": { "deprecation_date": "2024-12-06", - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_system_messages": true, @@ -13142,17 +13815,17 @@ "gpt-4.1": { "cache_read_input_token_cost": 5e-7, "cache_read_input_token_cost_priority": 8.75e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, - "input_cost_per_token_priority": 3.5e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, + "input_cost_per_token_priority": 0.0000035, "litellm_provider": "openai", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8e-6, - "output_cost_per_token_batches": 4e-6, - "output_cost_per_token_priority": 1.4e-5, + "output_cost_per_token": 0.000008, + "output_cost_per_token_batches": 0.000004, + "output_cost_per_token_priority": 0.000014, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -13169,15 +13842,15 @@ }, "gpt-4.1-2025-04-14": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "openai", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8e-6, - "output_cost_per_token_batches": 4e-6, + "output_cost_per_token": 0.000008, + "output_cost_per_token_batches": 0.000004, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -13203,9 +13876,9 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "output_cost_per_token_priority": 2.8e-6, + "output_cost_per_token_priority": 0.0000028, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -13229,7 +13902,7 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], @@ -13299,16 +13972,16 @@ "supports_vision": true }, "gpt-4.5-preview": { - "cache_read_input_token_cost": 3.75e-5, - "input_cost_per_token": 7.5e-5, - "input_cost_per_token_batches": 3.75e-5, + "cache_read_input_token_cost": 0.0000375, + "input_cost_per_token": 0.000075, + "input_cost_per_token_batches": 0.0000375, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00015, - "output_cost_per_token_batches": 7.5e-5, + "output_cost_per_token_batches": 0.000075, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13319,17 +13992,17 @@ "supports_vision": true }, "gpt-4.5-preview-2025-02-27": { - "cache_read_input_token_cost": 3.75e-5, + "cache_read_input_token_cost": 0.0000375, "deprecation_date": "2025-07-14", - "input_cost_per_token": 7.5e-5, - "input_cost_per_token_batches": 3.75e-5, + "input_cost_per_token": 0.000075, + "input_cost_per_token_batches": 0.0000375, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00015, - "output_cost_per_token_batches": 7.5e-5, + "output_cost_per_token_batches": 0.000075, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13340,19 +14013,19 @@ "supports_vision": true }, "gpt-4o": { - "cache_read_input_token_cost": 1.25e-6, - "cache_read_input_token_cost_priority": 2.125e-6, - "input_cost_per_token": 2.5e-6, - "input_cost_per_token_batches": 1.25e-6, - "input_cost_per_token_priority": 4.25e-6, + "cache_read_input_token_cost": 0.00000125, + "cache_read_input_token_cost_priority": 0.000002125, + "input_cost_per_token": 0.0000025, + "input_cost_per_token_batches": 0.00000125, + "input_cost_per_token_priority": 0.00000425, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_batches": 5e-6, - "output_cost_per_token_priority": 1.7e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_batches": 0.000005, + "output_cost_per_token_priority": 0.000017, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13364,17 +14037,17 @@ "supports_vision": true }, "gpt-4o-2024-05-13": { - "input_cost_per_token": 5e-6, - "input_cost_per_token_batches": 2.5e-6, - "input_cost_per_token_priority": 8.75e-6, + "input_cost_per_token": 0.000005, + "input_cost_per_token_batches": 0.0000025, + "input_cost_per_token_priority": 0.00000875, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_batches": 7.5e-6, - "output_cost_per_token_priority": 2.625e-5, + "output_cost_per_token": 0.000015, + "output_cost_per_token_batches": 0.0000075, + "output_cost_per_token_priority": 0.00002625, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13384,16 +14057,16 @@ "supports_vision": true }, "gpt-4o-2024-08-06": { - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, - "input_cost_per_token_batches": 1.25e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, + "input_cost_per_token_batches": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_batches": 5e-6, + "output_cost_per_token": 0.00001, + "output_cost_per_token_batches": 0.000005, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13405,16 +14078,16 @@ "supports_vision": true }, "gpt-4o-2024-11-20": { - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, - "input_cost_per_token_batches": 1.25e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, + "input_cost_per_token_batches": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_batches": 5e-6, + "output_cost_per_token": 0.00001, + "output_cost_per_token_batches": 0.000005, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13427,14 +14100,14 @@ }, "gpt-4o-audio-preview": { "input_cost_per_audio_token": 0.0001, - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", "output_cost_per_audio_token": 0.0002, - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13444,14 +14117,14 @@ }, "gpt-4o-audio-preview-2024-10-01": { "input_cost_per_audio_token": 0.0001, - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", "output_cost_per_audio_token": 0.0002, - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13460,15 +14133,15 @@ "supports_tool_choice": true }, "gpt-4o-audio-preview-2024-12-17": { - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 1e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00001, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13477,15 +14150,15 @@ "supports_tool_choice": true }, "gpt-4o-audio-preview-2025-06-03": { - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 1e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00001, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13506,7 +14179,7 @@ "mode": "chat", "output_cost_per_token": 6e-7, "output_cost_per_token_batches": 3e-7, - "output_cost_per_token_priority": 1e-6, + "output_cost_per_token_priority": 0.000001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13544,14 +14217,14 @@ "supports_vision": true }, "gpt-4o-mini-audio-preview": { - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_token": 1.5e-7, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 2e-5, + "output_cost_per_audio_token": 0.00002, "output_cost_per_token": 6e-7, "supports_audio_input": true, "supports_audio_output": true, @@ -13561,14 +14234,14 @@ "supports_tool_choice": true }, "gpt-4o-mini-audio-preview-2024-12-17": { - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_token": 1.5e-7, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 2e-5, + "output_cost_per_audio_token": 0.00002, "output_cost_per_token": 6e-7, "supports_audio_input": true, "supports_audio_output": true, @@ -13580,15 +14253,15 @@ "gpt-4o-mini-realtime-preview": { "cache_creation_input_audio_token_cost": 3e-7, "cache_read_input_token_cost": 3e-7, - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_token": 6e-7, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 2e-5, - "output_cost_per_token": 2.4e-6, + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13599,15 +14272,15 @@ "gpt-4o-mini-realtime-preview-2024-12-17": { "cache_creation_input_audio_token_cost": 3e-7, "cache_read_input_token_cost": 3e-7, - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_token": 6e-7, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 2e-5, - "output_cost_per_token": 2.4e-6, + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13662,37 +14335,37 @@ "supports_vision": true }, "gpt-4o-mini-transcribe": { - "input_cost_per_audio_token": 3e-6, - "input_cost_per_token": 1.25e-6, + "input_cost_per_audio_token": 0.000003, + "input_cost_per_token": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 16000, "max_output_tokens": 2000, "mode": "audio_transcription", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supported_endpoints": ["/v1/audio/transcriptions"] }, "gpt-4o-mini-tts": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "mode": "audio_speech", - "output_cost_per_audio_token": 1.2e-5, + "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/audio/speech"], "supported_modalities": ["text", "audio"], "supported_output_modalities": ["audio"] }, "gpt-4o-realtime-preview": { - "cache_read_input_token_cost": 2.5e-6, - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 5e-6, + "cache_read_input_token_cost": 0.0000025, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.000005, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 2e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00002, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13701,17 +14374,17 @@ "supports_tool_choice": true }, "gpt-4o-realtime-preview-2024-10-01": { - "cache_creation_input_audio_token_cost": 2e-5, - "cache_read_input_token_cost": 2.5e-6, + "cache_creation_input_audio_token_cost": 0.00002, + "cache_read_input_token_cost": 0.0000025, "input_cost_per_audio_token": 0.0001, - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", "output_cost_per_audio_token": 0.0002, - "output_cost_per_token": 2e-5, + "output_cost_per_token": 0.00002, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13720,16 +14393,16 @@ "supports_tool_choice": true }, "gpt-4o-realtime-preview-2024-12-17": { - "cache_read_input_token_cost": 2.5e-6, - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 5e-6, + "cache_read_input_token_cost": 0.0000025, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.000005, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 2e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00002, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13738,16 +14411,16 @@ "supports_tool_choice": true }, "gpt-4o-realtime-preview-2025-06-03": { - "cache_read_input_token_cost": 2.5e-6, - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 5e-6, + "cache_read_input_token_cost": 0.0000025, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.000005, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 2e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00002, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13756,16 +14429,16 @@ "supports_tool_choice": true }, "gpt-4o-search-preview": { - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, - "input_cost_per_token_batches": 1.25e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, + "input_cost_per_token_batches": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_batches": 5e-6, + "output_cost_per_token": 0.00001, + "output_cost_per_token_batches": 0.000005, "search_context_cost_per_query": { "search_context_size_high": 0.05, "search_context_size_low": 0.03, @@ -13782,16 +14455,16 @@ "supports_web_search": true }, "gpt-4o-search-preview-2025-03-11": { - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, - "input_cost_per_token_batches": 1.25e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, + "input_cost_per_token_batches": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_batches": 5e-6, + "output_cost_per_token": 0.00001, + "output_cost_per_token_batches": 0.000005, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13802,30 +14475,30 @@ "supports_vision": true }, "gpt-4o-transcribe": { - "input_cost_per_audio_token": 6e-6, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.000006, + "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 16000, "max_output_tokens": 2000, "mode": "audio_transcription", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/audio/transcriptions"] }, "gpt-5": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_flex": 6.25e-8, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "input_cost_per_token_flex": 6.25e-7, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_flex": 5e-6, - "output_cost_per_token_priority": 2e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_flex": 0.000005, + "output_cost_per_token_priority": 0.00002, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -13844,15 +14517,15 @@ "gpt-5.1": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_priority": 2e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "image"], @@ -13871,15 +14544,15 @@ "gpt-5.1-2025-11-13": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_priority": 2e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "image"], @@ -13898,15 +14571,15 @@ "gpt-5.1-chat-latest": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_priority": 2e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "image"], @@ -13922,15 +14595,15 @@ "supports_vision": true }, "gpt-5-pro": { - "input_cost_per_token": 1.5e-5, - "input_cost_per_token_batches": 7.5e-6, + "input_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000075, "litellm_provider": "openai", "max_input_tokens": 400000, "max_output_tokens": 272000, "max_tokens": 272000, "mode": "responses", - "output_cost_per_token": 1.2e-4, - "output_cost_per_token_batches": 6e-5, + "output_cost_per_token": 0.00012, + "output_cost_per_token_batches": 0.00006, "supported_endpoints": ["/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -13947,15 +14620,15 @@ "supports_web_search": true }, "gpt-5-pro-2025-10-06": { - "input_cost_per_token": 1.5e-5, - "input_cost_per_token_batches": 7.5e-6, + "input_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000075, "litellm_provider": "openai", "max_input_tokens": 400000, "max_output_tokens": 272000, "max_tokens": 272000, "mode": "responses", - "output_cost_per_token": 1.2e-4, - "output_cost_per_token_batches": 6e-5, + "output_cost_per_token": 0.00012, + "output_cost_per_token_batches": 0.00006, "supported_endpoints": ["/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -13975,17 +14648,17 @@ "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_flex": 6.25e-8, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "input_cost_per_token_flex": 6.25e-7, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_flex": 5e-6, - "output_cost_per_token_priority": 2e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_flex": 0.000005, + "output_cost_per_token_priority": 0.00002, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -14003,13 +14676,13 @@ }, "gpt-5-chat": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -14026,13 +14699,13 @@ }, "gpt-5-chat-latest": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -14049,13 +14722,13 @@ }, "gpt-5-codex": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -14073,15 +14746,38 @@ "gpt-5.1-codex": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1e-5, - "output_cost_per_token_priority": 2e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-5.1-codex-max": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 0.00000125, + "litellm_provider": "openai", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -14106,8 +14802,8 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 2e-6, - "output_cost_per_token_priority": 3.6e-6, + "output_cost_per_token": 0.000002, + "output_cost_per_token_priority": 0.0000036, "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -14134,9 +14830,9 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, - "output_cost_per_token_flex": 1e-6, - "output_cost_per_token_priority": 3.6e-6, + "output_cost_per_token": 0.000002, + "output_cost_per_token_flex": 0.000001, + "output_cost_per_token_priority": 0.0000036, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -14164,9 +14860,9 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, - "output_cost_per_token_flex": 1e-6, - "output_cost_per_token_priority": 3.6e-6, + "output_cost_per_token": 0.000002, + "output_cost_per_token_flex": 0.000001, + "output_cost_per_token_priority": 0.0000036, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -14187,7 +14883,7 @@ "cache_read_input_token_cost_flex": 2.5e-9, "input_cost_per_token": 5e-8, "input_cost_per_token_flex": 2.5e-8, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, @@ -14239,32 +14935,32 @@ "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "gpt-image-1-mini": { "cache_read_input_image_token_cost": 2.5e-7, "cache_read_input_token_cost": 2e-7, - "input_cost_per_image_token": 2.5e-6, - "input_cost_per_token": 2e-6, + "input_cost_per_image_token": 0.0000025, + "input_cost_per_token": 0.000002, "litellm_provider": "openai", "mode": "chat", - "output_cost_per_image_token": 8e-6, + "output_cost_per_image_token": 0.000008, "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"] }, "gpt-realtime": { "cache_creation_input_audio_token_cost": 4e-7, "cache_read_input_token_cost": 4e-7, - "input_cost_per_audio_token": 3.2e-5, - "input_cost_per_image": 5e-6, - "input_cost_per_token": 4e-6, + "input_cost_per_audio_token": 0.000032, + "input_cost_per_image": 0.000005, + "input_cost_per_token": 0.000004, "litellm_provider": "openai", "max_input_tokens": 32000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 6.4e-5, - "output_cost_per_token": 1.6e-5, + "output_cost_per_audio_token": 0.000064, + "output_cost_per_token": 0.000016, "supported_endpoints": ["/v1/realtime"], "supported_modalities": ["text", "image", "audio"], "supported_output_modalities": ["text", "audio"], @@ -14278,15 +14974,15 @@ "gpt-realtime-mini": { "cache_creation_input_audio_token_cost": 3e-7, "cache_read_input_audio_token_cost": 3e-7, - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_token": 6e-7, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 2e-5, - "output_cost_per_token": 2.4e-6, + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, "supported_endpoints": ["/v1/realtime"], "supported_modalities": ["text", "image", "audio"], "supported_output_modalities": ["text", "audio"], @@ -14300,16 +14996,16 @@ "gpt-realtime-2025-08-28": { "cache_creation_input_audio_token_cost": 4e-7, "cache_read_input_token_cost": 4e-7, - "input_cost_per_audio_token": 3.2e-5, - "input_cost_per_image": 5e-6, - "input_cost_per_token": 4e-6, + "input_cost_per_audio_token": 0.000032, + "input_cost_per_image": 0.000005, + "input_cost_per_token": 0.000004, "litellm_provider": "openai", "max_input_tokens": 32000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 6.4e-5, - "output_cost_per_token": 1.6e-5, + "output_cost_per_audio_token": 0.000064, + "output_cost_per_token": 0.000016, "supported_endpoints": ["/v1/realtime"], "supported_modalities": ["text", "image", "audio"], "supported_output_modalities": ["text", "audio"], @@ -14329,11 +15025,11 @@ "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3-opus": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "gradient_ai", "max_tokens": 1024, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supported_endpoints": ["/v1/chat/completions"], "supported_modalities": ["text"], "supports_tool_choice": false @@ -14343,27 +15039,27 @@ "litellm_provider": "gradient_ai", "max_tokens": 1024, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supported_endpoints": ["/v1/chat/completions"], "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.5-sonnet": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "gradient_ai", "max_tokens": 1024, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supported_endpoints": ["/v1/chat/completions"], "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.7-sonnet": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "gradient_ai", "max_tokens": 1024, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supported_endpoints": ["/v1/chat/completions"], "supported_modalities": ["text"], "supports_tool_choice": false @@ -14425,21 +15121,21 @@ "supports_tool_choice": false }, "gradient_ai/openai-o3": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "gradient_ai", "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supported_endpoints": ["/v1/chat/completions"], "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/openai-o3-mini": { - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "gradient_ai", "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supported_endpoints": ["/v1/chat/completions"], "supported_modalities": ["text"], "supports_tool_choice": false @@ -14514,14 +15210,14 @@ "output_cost_per_token": 9.9e-7, "supports_function_calling": true, "supports_reasoning": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/distil-whisper-large-v3-en": { - "input_cost_per_second": 5.56e-6, + "input_cost_per_second": 0.00000556, "litellm_provider": "groq", "mode": "audio_transcription", - "output_cost_per_second": 0.0 + "output_cost_per_second": 0 }, "groq/gemma-7b-it": { "deprecation_date": "2024-12-18", @@ -14533,7 +15229,7 @@ "mode": "chat", "output_cost_per_token": 7e-8, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/gemma2-9b-it": { @@ -14545,7 +15241,7 @@ "mode": "chat", "output_cost_per_token": 2e-7, "supports_function_calling": false, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": false }, "groq/llama-3.1-405b-reasoning": { @@ -14557,7 +15253,7 @@ "mode": "chat", "output_cost_per_token": 7.9e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-3.1-70b-versatile": { @@ -14570,7 +15266,7 @@ "mode": "chat", "output_cost_per_token": 7.9e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-3.1-8b-instant": { @@ -14582,7 +15278,7 @@ "mode": "chat", "output_cost_per_token": 8e-8, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-3.2-11b-text-preview": { @@ -14595,7 +15291,7 @@ "mode": "chat", "output_cost_per_token": 1.8e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-3.2-11b-vision-preview": { @@ -14608,7 +15304,7 @@ "mode": "chat", "output_cost_per_token": 1.8e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true, "supports_vision": true }, @@ -14622,7 +15318,7 @@ "mode": "chat", "output_cost_per_token": 4e-8, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-3.2-3b-preview": { @@ -14635,7 +15331,7 @@ "mode": "chat", "output_cost_per_token": 6e-8, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-3.2-90b-text-preview": { @@ -14648,7 +15344,7 @@ "mode": "chat", "output_cost_per_token": 9e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-3.2-90b-vision-preview": { @@ -14661,7 +15357,7 @@ "mode": "chat", "output_cost_per_token": 9e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true, "supports_vision": true }, @@ -14685,7 +15381,7 @@ "mode": "chat", "output_cost_per_token": 7.9e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-guard-3-8b": { @@ -14706,7 +15402,7 @@ "mode": "chat", "output_cost_per_token": 8e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama3-groq-70b-8192-tool-use-preview": { @@ -14719,7 +15415,7 @@ "mode": "chat", "output_cost_per_token": 8.9e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama3-groq-8b-8192-tool-use-preview": { @@ -14732,7 +15428,7 @@ "mode": "chat", "output_cost_per_token": 1.9e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/meta-llama/llama-4-maverick-17b-128e-instruct": { @@ -14778,25 +15474,25 @@ "mode": "chat", "output_cost_per_token": 2.4e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/moonshotai/kimi-k2-instruct": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "groq", "max_input_tokens": 131072, "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true }, "groq/moonshotai/kimi-k2-instruct-0905": { - "input_cost_per_token": 1e-6, - "output_cost_per_token": 3e-6, - "cache_read_input_token_cost": 0.5e-6, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "cache_read_input_token_cost": 5e-7, "litellm_provider": "groq", "max_input_tokens": 262144, "max_output_tokens": 16384, @@ -14837,7 +15533,7 @@ "supports_web_search": true }, "groq/playai-tts": { - "input_cost_per_character": 5e-5, + "input_cost_per_character": 0.00005, "litellm_provider": "groq", "max_input_tokens": 10000, "max_output_tokens": 10000, @@ -14854,38 +15550,38 @@ "output_cost_per_token": 5.9e-7, "supports_function_calling": true, "supports_reasoning": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/whisper-large-v3": { - "input_cost_per_second": 3.083e-5, + "input_cost_per_second": 0.00003083, "litellm_provider": "groq", "mode": "audio_transcription", - "output_cost_per_second": 0.0 + "output_cost_per_second": 0 }, "groq/whisper-large-v3-turbo": { - "input_cost_per_second": 1.111e-5, + "input_cost_per_second": 0.00001111, "litellm_provider": "groq", "mode": "audio_transcription", - "output_cost_per_second": 0.0 + "output_cost_per_second": 0 }, "hd/1024-x-1024/dall-e-3": { "input_cost_per_pixel": 7.629e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "hd/1024-x-1792/dall-e-3": { "input_cost_per_pixel": 6.539e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "hd/1792-x-1024/dall-e-3": { "input_cost_per_pixel": 6.539e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "heroku/claude-3-5-haiku": { "litellm_provider": "heroku", @@ -14923,21 +15619,21 @@ "input_cost_per_pixel": 1.59263611e-7, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "high/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "high/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "hyperbolic/NousResearch/Hermes-3-Llama-3.1-70B": { @@ -14993,13 +15689,13 @@ "supports_tool_choice": true }, "hyperbolic/Qwen/Qwen3-235B-A22B": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "hyperbolic", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_system_messages": true, @@ -15136,44 +15832,44 @@ "supports_tool_choice": true }, "hyperbolic/moonshotai/Kimi-K2-Instruct": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "hyperbolic", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_system_messages": true, "supports_tool_choice": true }, "j2-light": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "ai21", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "completion", - "output_cost_per_token": 3e-6 + "output_cost_per_token": 0.000003 }, "j2-mid": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "ai21", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "completion", - "output_cost_per_token": 1e-5 + "output_cost_per_token": 0.00001 }, "j2-ultra": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "ai21", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "completion", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "jamba-1.5": { "input_cost_per_token": 2e-7, @@ -15186,23 +15882,23 @@ "supports_tool_choice": true }, "jamba-1.5-large": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "ai21", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_tool_choice": true }, "jamba-1.5-large@001": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "ai21", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_tool_choice": true }, "jamba-1.5-mini": { @@ -15226,23 +15922,23 @@ "supports_tool_choice": true }, "jamba-large-1.6": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "ai21", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_tool_choice": true }, "jamba-large-1.7": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "ai21", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_tool_choice": true }, "jamba-mini-1.6": { @@ -15276,19 +15972,19 @@ "output_cost_per_token": 1.8e-8 }, "jp.anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 4.125e-6, + "cache_creation_input_token_cost": 0.000004125, "cache_read_input_token_cost": 3.3e-7, - "input_cost_per_token": 3.3e-6, - "input_cost_per_token_above_200k_tokens": 6.6e-6, - "output_cost_per_token_above_200k_tokens": 2.475e-5, - "cache_creation_input_token_cost_above_200k_tokens": 8.25e-6, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.65e-5, + "output_cost_per_token": 0.0000165, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -15306,15 +16002,15 @@ "tool_use_system_prompt_tokens": 346 }, "jp.anthropic.claude-haiku-4-5-20251001-v1:0": { - "cache_creation_input_token_cost": 1.375e-6, + "cache_creation_input_token_cost": 0.000001375, "cache_read_input_token_cost": 1.1e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5.5e-6, + "output_cost_per_token": 0.0000055, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, "supports_computer_use": true, @@ -15596,50 +16292,50 @@ "input_cost_per_pixel": 1.0490417e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "low/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "low/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "luminous-base": { - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "aleph_alpha", "max_tokens": 2048, "mode": "completion", - "output_cost_per_token": 3.3e-5 + "output_cost_per_token": 0.000033 }, "luminous-base-control": { - "input_cost_per_token": 3.75e-5, + "input_cost_per_token": 0.0000375, "litellm_provider": "aleph_alpha", "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 4.125e-5 + "output_cost_per_token": 0.00004125 }, "luminous-extended": { - "input_cost_per_token": 4.5e-5, + "input_cost_per_token": 0.000045, "litellm_provider": "aleph_alpha", "max_tokens": 2048, "mode": "completion", - "output_cost_per_token": 4.95e-5 + "output_cost_per_token": 0.0000495 }, "luminous-extended-control": { - "input_cost_per_token": 5.625e-5, + "input_cost_per_token": 0.00005625, "litellm_provider": "aleph_alpha", "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 6.1875e-5 + "output_cost_per_token": 0.000061875 }, "luminous-supreme": { "input_cost_per_token": 0.000175, @@ -15673,21 +16369,21 @@ "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "medium/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "medium/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, "low/1024-x-1024/gpt-image-1-mini": { @@ -15727,13 +16423,13 @@ "supported_endpoints": ["/v1/images/generations"] }, "medlm-large": { - "input_cost_per_character": 5e-6, + "input_cost_per_character": 0.000005, "litellm_provider": "vertex_ai-language-models", "max_input_tokens": 8192, "max_output_tokens": 1024, "max_tokens": 1024, "mode": "chat", - "output_cost_per_character": 1.5e-5, + "output_cost_per_character": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_tool_choice": true }, @@ -15744,7 +16440,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_character": 1e-6, + "output_cost_per_character": 0.000001, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_tool_choice": true }, @@ -15755,25 +16451,25 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "meta.llama2-70b-chat-v1": { - "input_cost_per_token": 1.95e-6, + "input_cost_per_token": 0.00000195, "litellm_provider": "bedrock", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2.56e-6 + "output_cost_per_token": 0.00000256 }, "meta.llama3-1-405b-instruct-v1:0": { - "input_cost_per_token": 5.32e-6, + "input_cost_per_token": 0.00000532, "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.6e-5, + "output_cost_per_token": 0.000016, "supports_function_calling": true, "supports_tool_choice": false }, @@ -15834,13 +16530,13 @@ "supports_tool_choice": false }, "meta.llama3-2-90b-instruct-v1:0": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_tool_choice": false, "supports_vision": true @@ -15857,13 +16553,13 @@ "supports_tool_choice": false }, "meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 2.65e-6, + "input_cost_per_token": 0.00000265, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 3.5e-6 + "output_cost_per_token": 0.0000035 }, "meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 3e-7, @@ -15963,34 +16659,34 @@ "supports_tool_choice": true }, "mistral.mistral-large-2402-v1:0": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_function_calling": true }, "mistral.mistral-large-2407-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 9e-6, + "output_cost_per_token": 0.000009, "supports_function_calling": true, "supports_tool_choice": true }, "mistral.mistral-small-2402-v1:0": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "bedrock", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_function_calling": true }, "mistral.mixtral-8x7b-instruct-v0:1": { @@ -16004,25 +16700,25 @@ "supports_tool_choice": true }, "mistral/codestral-2405": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "mistral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_assistant_prefill": true, "supports_response_schema": true, "supports_tool_choice": true }, "mistral/codestral-latest": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "mistral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_assistant_prefill": true, "supports_response_schema": true, "supports_tool_choice": true @@ -16046,7 +16742,7 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "source": "https://mistral.ai/news/devstral", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -16082,13 +16778,13 @@ "supports_tool_choice": true }, "mistral/magistral-medium-2506": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 40000, "max_output_tokens": 40000, "max_tokens": 40000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://mistral.ai/news/magistral", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -16097,13 +16793,13 @@ "supports_tool_choice": true }, "mistral/magistral-medium-2509": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 40000, "max_output_tokens": 40000, "max_tokens": 40000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://mistral.ai/news/magistral", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -16113,28 +16809,28 @@ }, "mistral/mistral-ocr-latest": { "litellm_provider": "mistral", - "ocr_cost_per_page": 1e-3, - "annotation_cost_per_page": 3e-3, + "ocr_cost_per_page": 0.001, + "annotation_cost_per_page": 0.003, "mode": "ocr", "supported_endpoints": ["/v1/ocr"], "source": "https://mistral.ai/pricing#api-pricing" }, "mistral/mistral-ocr-2505-completion": { "litellm_provider": "mistral", - "ocr_cost_per_page": 1e-3, - "annotation_cost_per_page": 3e-3, + "ocr_cost_per_page": 0.001, + "annotation_cost_per_page": 0.003, "mode": "ocr", "supported_endpoints": ["/v1/ocr"], "source": "https://mistral.ai/pricing#api-pricing" }, "mistral/magistral-medium-latest": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 40000, "max_output_tokens": 40000, "max_tokens": 40000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://mistral.ai/news/magistral", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -16149,7 +16845,7 @@ "max_output_tokens": 40000, "max_tokens": 40000, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://mistral.ai/pricing#api-pricing", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -16164,7 +16860,7 @@ "max_output_tokens": 40000, "max_tokens": 40000, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://mistral.ai/pricing#api-pricing", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -16180,91 +16876,106 @@ "mode": "embedding" }, "mistral/codestral-embed": { - "input_cost_per_token": 0.15e-6, + "input_cost_per_token": 1.5e-7, "litellm_provider": "mistral", "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding" }, "mistral/codestral-embed-2505": { - "input_cost_per_token": 0.15e-6, + "input_cost_per_token": 1.5e-7, "litellm_provider": "mistral", "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding" }, "mistral/mistral-large-2402": { - "input_cost_per_token": 4e-6, + "input_cost_per_token": 0.000004, "litellm_provider": "mistral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 1.2e-5, + "output_cost_per_token": 0.000012, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true }, "mistral/mistral-large-2407": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "mistral", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 9e-6, + "output_cost_per_token": 0.000009, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true }, "mistral/mistral-large-2411": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true }, "mistral/mistral-large-latest": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true }, + "mistral/mistral-large-3": { + "input_cost_per_token": 5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.0000015, + "source": "https://docs.mistral.ai/models/mistral-large-3-25-12", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, "mistral/mistral-medium": { - "input_cost_per_token": 2.7e-6, + "input_cost_per_token": 0.0000027, "litellm_provider": "mistral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 8.1e-6, + "output_cost_per_token": 0.0000081, "supports_assistant_prefill": true, "supports_response_schema": true, "supports_tool_choice": true }, "mistral/mistral-medium-2312": { - "input_cost_per_token": 2.7e-6, + "input_cost_per_token": 0.0000027, "litellm_provider": "mistral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 8.1e-6, + "output_cost_per_token": 0.0000081, "supports_assistant_prefill": true, "supports_response_schema": true, "supports_tool_choice": true @@ -16276,7 +16987,7 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, @@ -16289,7 +17000,7 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, @@ -16384,13 +17095,13 @@ "supports_tool_choice": true }, "mistral/open-mixtral-8x22b": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 65336, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, @@ -16424,13 +17135,13 @@ "supports_vision": true }, "mistral/pixtral-large-2411": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, @@ -16438,13 +17149,13 @@ "supports_vision": true }, "mistral/pixtral-large-latest": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, @@ -16459,7 +17170,35 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.5e-6, + "output_cost_per_token": 0.0000025, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/kimi-k2-0905-preview": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/kimi-k2-turbo-preview": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 0.00000115, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000008, "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", "supports_function_calling": true, "supports_tool_choice": true, @@ -16467,13 +17206,13 @@ }, "moonshot/kimi-latest": { "cache_read_input_token_cost": 1.5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "moonshot", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true, @@ -16481,13 +17220,13 @@ }, "moonshot/kimi-latest-128k": { "cache_read_input_token_cost": 1.5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "moonshot", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true, @@ -16495,13 +17234,13 @@ }, "moonshot/kimi-latest-32k": { "cache_read_input_token_cost": 1.5e-7, - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "moonshot", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true, @@ -16515,21 +17254,22 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "moonshot/kimi-thinking-preview": { - "input_cost_per_token": 3e-5, + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 6e-7, "litellm_provider": "moonshot", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 3e-5, - "source": "https://platform.moonshot.ai/docs/pricing", + "output_cost_per_token": 0.0000025, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", "supports_vision": true }, "moonshot/kimi-k2-thinking": { @@ -16540,81 +17280,95 @@ "max_output_tokens": 262144, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 2.5e-6, + "output_cost_per_token": 0.0000025, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/kimi-k2-thinking-turbo": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 0.00000115, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000008, "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "moonshot/moonshot-v1-128k": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "moonshot", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true }, "moonshot/moonshot-v1-128k-0430": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "moonshot", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true }, "moonshot/moonshot-v1-128k-vision-preview": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "moonshot", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "moonshot/moonshot-v1-32k": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "moonshot", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true }, "moonshot/moonshot-v1-32k-0430": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "moonshot", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true }, "moonshot/moonshot-v1-32k-vision-preview": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "moonshot", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true, @@ -16627,7 +17381,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true @@ -16639,7 +17393,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true @@ -16651,20 +17405,20 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "moonshot/moonshot-v1-auto": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "moonshot", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true @@ -16676,7 +17430,7 @@ "max_output_tokens": 16000, "max_tokens": 16000, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "supports_function_calling": false, "supports_parallel_function_calling": false, "supports_system_messages": true, @@ -16690,7 +17444,7 @@ "max_output_tokens": 16000, "max_tokens": 16000, "mode": "chat", - "output_cost_per_token": 1.9e-6, + "output_cost_per_token": 0.0000019, "supports_function_calling": false, "supports_parallel_function_calling": false, "supports_system_messages": true, @@ -16763,7 +17517,7 @@ "input_cost_per_pixel": 1.3e-9, "litellm_provider": "nscale", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", "supported_endpoints": ["/v1/images/generations"] }, @@ -16868,19 +17622,19 @@ "input_cost_per_pixel": 3e-9, "litellm_provider": "nscale", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", "supported_endpoints": ["/v1/images/generations"] }, "o1": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -16892,14 +17646,14 @@ "supports_vision": true }, "o1-2024-12-17": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -16912,55 +17666,55 @@ }, "o1-mini": { "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_vision": true }, "o1-mini-2024-09-12": { "deprecation_date": "2025-10-27", - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 3e-6, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000003, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1.2e-5, + "output_cost_per_token": 0.000012, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_vision": true }, "o1-preview": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_vision": true }, "o1-preview-2024-09-12": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, @@ -16968,7 +17722,7 @@ }, "o1-pro": { "input_cost_per_token": 0.00015, - "input_cost_per_token_batches": 7.5e-5, + "input_cost_per_token_batches": 0.000075, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, @@ -16992,7 +17746,7 @@ }, "o1-pro-2025-03-19": { "input_cost_per_token": 0.00015, - "input_cost_per_token_batches": 7.5e-5, + "input_cost_per_token_batches": 0.000075, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, @@ -17018,17 +17772,17 @@ "cache_read_input_token_cost": 5e-7, "cache_read_input_token_cost_flex": 2.5e-7, "cache_read_input_token_cost_priority": 8.75e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_flex": 1e-6, - "input_cost_per_token_priority": 3.5e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_flex": 0.000001, + "input_cost_per_token_priority": 0.0000035, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 8e-6, - "output_cost_per_token_flex": 4e-6, - "output_cost_per_token_priority": 1.4e-5, + "output_cost_per_token": 0.000008, + "output_cost_per_token_flex": 0.000004, + "output_cost_per_token_priority": 0.000014, "supported_endpoints": [ "/v1/responses", "/v1/chat/completions", @@ -17049,13 +17803,13 @@ }, "o3-2025-04-16": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supported_endpoints": [ "/v1/responses", "/v1/chat/completions", @@ -17075,16 +17829,16 @@ "supports_vision": true }, "o3-deep-research": { - "cache_read_input_token_cost": 2.5e-6, - "input_cost_per_token": 1e-5, - "input_cost_per_token_batches": 5e-6, + "cache_read_input_token_cost": 0.0000025, + "input_cost_per_token": 0.00001, + "input_cost_per_token_batches": 0.000005, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 4e-5, - "output_cost_per_token_batches": 2e-5, + "output_cost_per_token": 0.00004, + "output_cost_per_token_batches": 0.00002, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -17099,16 +17853,16 @@ "supports_vision": true }, "o3-deep-research-2025-06-26": { - "cache_read_input_token_cost": 2.5e-6, - "input_cost_per_token": 1e-5, - "input_cost_per_token_batches": 5e-6, + "cache_read_input_token_cost": 0.0000025, + "input_cost_per_token": 0.00001, + "input_cost_per_token_batches": 0.000005, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 4e-5, - "output_cost_per_token_batches": 2e-5, + "output_cost_per_token": 0.00004, + "output_cost_per_token_batches": 0.00002, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -17124,13 +17878,13 @@ }, "o3-mini": { "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -17141,13 +17895,13 @@ }, "o3-mini-2025-01-31": { "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -17157,15 +17911,15 @@ "supports_vision": false }, "o3-pro": { - "input_cost_per_token": 2e-5, - "input_cost_per_token_batches": 1e-5, + "input_cost_per_token": 0.00002, + "input_cost_per_token_batches": 0.00001, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 8e-5, - "output_cost_per_token_batches": 4e-5, + "output_cost_per_token": 0.00008, + "output_cost_per_token_batches": 0.00004, "supported_endpoints": ["/v1/responses", "/v1/batch"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -17179,15 +17933,15 @@ "supports_vision": true }, "o3-pro-2025-06-10": { - "input_cost_per_token": 2e-5, - "input_cost_per_token_batches": 1e-5, + "input_cost_per_token": 0.00002, + "input_cost_per_token_batches": 0.00001, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 8e-5, - "output_cost_per_token_batches": 4e-5, + "output_cost_per_token": 0.00008, + "output_cost_per_token_batches": 0.00004, "supported_endpoints": ["/v1/responses", "/v1/batch"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -17204,17 +17958,17 @@ "cache_read_input_token_cost": 2.75e-7, "cache_read_input_token_cost_flex": 1.375e-7, "cache_read_input_token_cost_priority": 5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "input_cost_per_token_flex": 5.5e-7, - "input_cost_per_token_priority": 2e-6, + "input_cost_per_token_priority": 0.000002, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, - "output_cost_per_token_flex": 2.2e-6, - "output_cost_per_token_priority": 8e-6, + "output_cost_per_token": 0.0000044, + "output_cost_per_token_flex": 0.0000022, + "output_cost_per_token_priority": 0.000008, "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -17227,13 +17981,13 @@ }, "o4-mini-2025-04-16": { "cache_read_input_token_cost": 2.75e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -17246,15 +18000,15 @@ }, "o4-mini-deep-research": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 8e-6, - "output_cost_per_token_batches": 4e-6, + "output_cost_per_token": 0.000008, + "output_cost_per_token_batches": 0.000004, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -17270,15 +18024,15 @@ }, "o4-mini-deep-research-2025-06-26": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 8e-6, - "output_cost_per_token_batches": 4e-6, + "output_cost_per_token": 0.000008, + "output_cost_per_token_batches": 0.000004, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], @@ -17293,25 +18047,25 @@ "supports_vision": true }, "oci/meta.llama-3.1-405b-instruct": { - "input_cost_per_token": 1.068e-5, + "input_cost_per_token": 0.00001068, "litellm_provider": "oci", "max_input_tokens": 128000, "max_output_tokens": 4000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.068e-5, + "output_cost_per_token": 0.00001068, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, "supports_response_schema": false }, "oci/meta.llama-3.2-90b-vision-instruct": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "oci", "max_input_tokens": 128000, "max_output_tokens": 4000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, "supports_response_schema": false @@ -17353,7 +18107,7 @@ "supports_response_schema": false }, "oci/xai.grok-3": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "oci", "max_input_tokens": 131072, "max_output_tokens": 131072, @@ -17365,13 +18119,13 @@ "supports_response_schema": false }, "oci/xai.grok-3-fast": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "oci", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.5e-5, + "output_cost_per_token": 0.000025, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, "supports_response_schema": false @@ -17395,13 +18149,13 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, "supports_response_schema": false }, "oci/xai.grok-4": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "oci", "max_input_tokens": 128000, "max_output_tokens": 128000, @@ -17413,345 +18167,345 @@ "supports_response_schema": false }, "oci/cohere.command-latest": { - "input_cost_per_token": 1.56e-6, + "input_cost_per_token": 0.00000156, "litellm_provider": "oci", "max_input_tokens": 128000, "max_output_tokens": 4000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.56e-6, + "output_cost_per_token": 0.00000156, "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", "supports_function_calling": true, "supports_response_schema": false }, "oci/cohere.command-a-03-2025": { - "input_cost_per_token": 1.56e-6, + "input_cost_per_token": 0.00000156, "litellm_provider": "oci", "max_input_tokens": 256000, "max_output_tokens": 4000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.56e-6, + "output_cost_per_token": 0.00000156, "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", "supports_function_calling": true, "supports_response_schema": false }, "oci/cohere.command-plus-latest": { - "input_cost_per_token": 1.56e-6, + "input_cost_per_token": 0.00000156, "litellm_provider": "oci", "max_input_tokens": 128000, "max_output_tokens": 4000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.56e-6, + "output_cost_per_token": 0.00000156, "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", "supports_function_calling": true, "supports_response_schema": false }, "ollama/codegeex4": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 32768, "max_output_tokens": 8192, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": false }, "ollama/codegemma": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/codellama": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/deepseek-coder-v2-base": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "completion", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/deepseek-coder-v2-instruct": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 32768, "max_output_tokens": 8192, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/deepseek-coder-v2-lite-base": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "completion", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/deepseek-coder-v2-lite-instruct": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 32768, "max_output_tokens": 8192, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/deepseek-v3.1:671b-cloud": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 163840, "max_output_tokens": 163840, "max_tokens": 163840, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/gpt-oss:120b-cloud": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/gpt-oss:20b-cloud": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/internlm2_5-20b-chat": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 32768, "max_output_tokens": 8192, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/llama2": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/llama2-uncensored": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/llama2:13b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/llama2:70b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/llama2:7b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/llama3": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/llama3.1": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/llama3:70b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/llama3:8b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/mistral": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "completion", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/mistral-7B-Instruct-v0.1": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/mistral-7B-Instruct-v0.2": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/mistral-large-instruct-2407": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 65536, "max_output_tokens": 8192, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/mixtral-8x22B-Instruct-v0.1": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 65536, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/mixtral-8x7B-Instruct-v0.1": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/orca-mini": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/qwen3-coder:480b-cloud": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 262144, "max_output_tokens": 262144, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/vicuna": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 2048, "max_output_tokens": 2048, "max_tokens": 2048, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "omni-moderation-2024-09-26": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 0, "max_tokens": 32768, "mode": "moderation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "omni-moderation-latest": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 0, "max_tokens": 32768, "mode": "moderation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "omni-moderation-latest-intents": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 0, "max_tokens": 32768, "mode": "moderation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "openai.gpt-oss-120b-1:0": { "input_cost_per_token": 1.5e-7, @@ -17780,31 +18534,31 @@ "supports_tool_choice": true }, "openrouter/anthropic/claude-2": { - "input_cost_per_token": 1.102e-5, + "input_cost_per_token": 0.00001102, "litellm_provider": "openrouter", "max_output_tokens": 8191, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 3.268e-5, + "output_cost_per_token": 0.00003268, "supports_tool_choice": true }, "openrouter/anthropic/claude-3-5-haiku": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "openrouter", "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supports_function_calling": true, "supports_tool_choice": true }, "openrouter/anthropic/claude-3-5-haiku-20241022": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supports_function_calling": true, "supports_tool_choice": true, "tool_use_system_prompt_tokens": 264 @@ -17815,7 +18569,7 @@ "litellm_provider": "openrouter", "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true @@ -17827,20 +18581,20 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true, "tool_use_system_prompt_tokens": 264 }, "openrouter/anthropic/claude-3-opus": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true, @@ -17848,23 +18602,23 @@ }, "openrouter/anthropic/claude-3-sonnet": { "input_cost_per_image": 0.0048, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "openrouter/anthropic/claude-3.5-sonnet": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -17873,13 +18627,13 @@ "tool_use_system_prompt_tokens": 159 }, "openrouter/anthropic/claude-3.5-sonnet:beta": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_computer_use": true, "supports_function_calling": true, "supports_tool_choice": true, @@ -17888,13 +18642,13 @@ }, "openrouter/anthropic/claude-3.7-sonnet": { "input_cost_per_image": 0.0048, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -17905,13 +18659,13 @@ }, "openrouter/anthropic/claude-3.7-sonnet:beta": { "input_cost_per_image": 0.0048, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_computer_use": true, "supports_function_calling": true, "supports_reasoning": true, @@ -17920,25 +18674,25 @@ "tool_use_system_prompt_tokens": 159 }, "openrouter/anthropic/claude-instant-v1": { - "input_cost_per_token": 1.63e-6, + "input_cost_per_token": 0.00000163, "litellm_provider": "openrouter", "max_output_tokens": 8191, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 5.51e-6, + "output_cost_per_token": 0.00000551, "supports_tool_choice": true }, "openrouter/anthropic/claude-opus-4": { "input_cost_per_image": 0.0048, - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -17950,16 +18704,16 @@ }, "openrouter/anthropic/claude-opus-4.1": { "input_cost_per_image": 0.0048, - "cache_creation_input_token_cost": 1.875e-5, - "cache_creation_input_token_cost_above_1hr": 3e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.00003, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -17971,19 +18725,38 @@ }, "openrouter/anthropic/claude-sonnet-4": { "input_cost_per_image": 0.0048, - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost": 3e-7, "cache_read_input_token_cost_above_200k_tokens": 6e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, "litellm_provider": "openrouter", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-opus-4.5": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.000025, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -17995,19 +18768,19 @@ }, "openrouter/anthropic/claude-sonnet-4.5": { "input_cost_per_image": 0.0048, - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "openrouter", "max_input_tokens": 1000000, "max_output_tokens": 1000000, "max_tokens": 1000000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -18018,15 +18791,15 @@ "tool_use_system_prompt_tokens": 159 }, "openrouter/anthropic/claude-haiku-4.5": { - "cache_creation_input_token_cost": 1.25e-6, + "cache_creation_input_token_cost": 0.00000125, "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 200000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -18056,11 +18829,11 @@ "supports_tool_choice": true }, "openrouter/cohere/command-r-plus": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_tool_choice": true }, "openrouter/databricks/dbrx-instruct": { @@ -18108,6 +18881,21 @@ "supports_reasoning": true, "supports_tool_choice": true }, + "openrouter/deepseek/deepseek-v3.2": { + "input_cost_per_token": 2.8e-7, + "input_cost_per_token_cache_hit": 2.8e-8, + "litellm_provider": "openrouter", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, "openrouter/deepseek/deepseek-v3.2-exp": { "input_cost_per_token": 2e-7, "input_cost_per_token_cache_hit": 2e-8, @@ -18142,7 +18930,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.19e-6, + "output_cost_per_token": 0.00000219, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -18157,7 +18945,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.15e-6, + "output_cost_per_token": 0.00000215, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -18208,7 +18996,7 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 2.5e-6, + "output_cost_per_token": 0.0000025, "supports_audio_output": true, "supports_function_calling": true, "supports_response_schema": true, @@ -18218,7 +19006,7 @@ }, "openrouter/google/gemini-2.5-pro": { "input_cost_per_audio_token": 7e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "openrouter", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -18230,7 +19018,7 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_audio_output": true, "supports_function_calling": true, "supports_response_schema": true, @@ -18242,9 +19030,9 @@ "cache_read_input_token_cost": 2e-7, "cache_read_input_token_cost_above_200k_tokens": 4e-7, "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_above_200k_tokens": 4e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "openrouter", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -18256,9 +19044,9 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1.2e-5, - "output_cost_per_token_above_200k_tokens": 1.8e-5, - "output_cost_per_token_batches": 6e-6, + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], "supported_modalities": ["text", "image", "audio", "video"], "supported_output_modalities": ["text"], @@ -18276,13 +19064,13 @@ }, "openrouter/google/gemini-pro-1.5": { "input_cost_per_image": 0.00265, - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "openrouter", "max_input_tokens": 1000000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 7.5e-6, + "output_cost_per_token": 0.0000075, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true @@ -18315,27 +19103,27 @@ "supports_tool_choice": true }, "openrouter/gryphe/mythomax-l2-13b": { - "input_cost_per_token": 1.875e-6, + "input_cost_per_token": 0.000001875, "litellm_provider": "openrouter", "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.875e-6, + "output_cost_per_token": 0.000001875, "supports_tool_choice": true }, "openrouter/jondurbin/airoboros-l2-70b-2.1": { - "input_cost_per_token": 1.3875e-5, + "input_cost_per_token": 0.000013875, "litellm_provider": "openrouter", "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.3875e-5, + "output_cost_per_token": 0.000013875, "supports_tool_choice": true }, "openrouter/mancer/weaver": { - "input_cost_per_token": 5.625e-6, + "input_cost_per_token": 0.000005625, "litellm_provider": "openrouter", "max_tokens": 8000, "mode": "chat", - "output_cost_per_token": 5.625e-6, + "output_cost_per_token": 0.000005625, "supports_tool_choice": true }, "openrouter/meta-llama/codellama-34b-instruct": { @@ -18355,11 +19143,11 @@ "supports_tool_choice": true }, "openrouter/meta-llama/llama-2-70b-chat": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "openrouter", "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_tool_choice": true }, "openrouter/meta-llama/llama-3-70b-instruct": { @@ -18383,23 +19171,23 @@ "litellm_provider": "openrouter", "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 2.25e-6, + "output_cost_per_token": 0.00000225, "supports_tool_choice": true }, "openrouter/meta-llama/llama-3-8b-instruct:free": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openrouter", "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_tool_choice": true }, "openrouter/microsoft/wizardlm-2-8x22b:nitro": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "openrouter", "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "supports_tool_choice": true }, "openrouter/minimax/minimax-m2": { @@ -18409,7 +19197,7 @@ "max_output_tokens": 204800, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.02e-6, + "output_cost_per_token": 0.00000102, "supports_function_calling": true, "supports_prompt_caching": false, "supports_reasoning": true, @@ -18424,19 +19212,19 @@ "supports_tool_choice": true }, "openrouter/mistralai/mistral-7b-instruct:free": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openrouter", "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_tool_choice": true }, "openrouter/mistralai/mistral-large": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "openrouter", "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "openrouter/mistralai/mistral-small-3.1-24b-instruct": { @@ -18472,49 +19260,49 @@ "supports_tool_choice": true }, "openrouter/openai/gpt-3.5-turbo": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "openrouter", "max_tokens": 4095, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_tool_choice": true }, "openrouter/openai/gpt-3.5-turbo-16k": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_tokens": 16383, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_tool_choice": true }, "openrouter/openai/gpt-4": { - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "openrouter", "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_tool_choice": true }, "openrouter/openai/gpt-4-vision-preview": { "input_cost_per_image": 0.01445, - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openrouter", "max_tokens": 130000, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "openrouter/openai/gpt-4.1": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "openrouter", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -18525,13 +19313,13 @@ }, "openrouter/openai/gpt-4.1-2025-04-14": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "openrouter", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -18548,7 +19336,7 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -18565,7 +19353,7 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -18609,26 +19397,26 @@ "supports_vision": true }, "openrouter/openai/gpt-4o": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "openrouter/openai/gpt-4o-2024-05-13": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true, @@ -18636,13 +19424,13 @@ }, "openrouter/openai/gpt-5-chat": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "openrouter", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], "supports_reasoning": true, @@ -18650,13 +19438,13 @@ }, "openrouter/openai/gpt-5-codex": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "openrouter", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], "supports_reasoning": true, @@ -18664,13 +19452,13 @@ }, "openrouter/openai/gpt-5": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "openrouter", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], "supports_reasoning": true, @@ -18684,7 +19472,7 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], "supports_reasoning": true, @@ -18735,14 +19523,14 @@ "supports_tool_choice": true }, "openrouter/openai/o1": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -18752,65 +19540,65 @@ "supports_vision": true }, "openrouter/openai/o1-mini": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1.2e-5, + "output_cost_per_token": 0.000012, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true, "supports_vision": false }, "openrouter/openai/o1-mini-2024-09-12": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1.2e-5, + "output_cost_per_token": 0.000012, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true, "supports_vision": false }, "openrouter/openai/o1-preview": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true, "supports_vision": false }, "openrouter/openai/o1-preview-2024-09-12": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true, "supports_vision": false }, "openrouter/openai/o3-mini": { - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_reasoning": true, @@ -18818,13 +19606,13 @@ "supports_vision": false }, "openrouter/openai/o3-mini-high": { - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_reasoning": true, @@ -18832,11 +19620,11 @@ "supports_vision": false }, "openrouter/pygmalionai/mythalion-13b": { - "input_cost_per_token": 1.875e-6, + "input_cost_per_token": 0.000001875, "litellm_provider": "openrouter", "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.875e-6, + "output_cost_per_token": 0.000001875, "supports_tool_choice": true }, "openrouter/qwen/qwen-2.5-coder-32b-instruct": { @@ -18879,26 +19667,26 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 3.4e-6, + "output_cost_per_token": 0.0000034, "source": "https://openrouter.ai/switchpoint/router", "supports_tool_choice": true }, "openrouter/undi95/remm-slerp-l2-13b": { - "input_cost_per_token": 1.875e-6, + "input_cost_per_token": 0.000001875, "litellm_provider": "openrouter", "max_tokens": 6144, "mode": "chat", - "output_cost_per_token": 1.875e-6, + "output_cost_per_token": 0.000001875, "supports_tool_choice": true }, "openrouter/x-ai/grok-4": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "source": "https://openrouter.ai/x-ai/grok-4", "supports_function_calling": true, "supports_reasoning": true, @@ -18920,13 +19708,13 @@ "supports_web_search": false }, "openrouter/z-ai/glm-4.6": { - "input_cost_per_token": 4.0e-7, + "input_cost_per_token": 4e-7, "litellm_provider": "openrouter", "max_input_tokens": 202800, "max_output_tokens": 131000, "max_tokens": 202800, "mode": "chat", - "output_cost_per_token": 1.75e-6, + "output_cost_per_token": 0.00000175, "source": "https://openrouter.ai/z-ai/glm-4.6", "supports_function_calling": true, "supports_reasoning": true, @@ -18939,7 +19727,7 @@ "max_output_tokens": 131000, "max_tokens": 202800, "mode": "chat", - "output_cost_per_token": 1.9e-6, + "output_cost_per_token": 0.0000019, "source": "https://openrouter.ai/z-ai/glm-4.6:exacto", "supports_function_calling": true, "supports_reasoning": true, @@ -19224,7 +20012,7 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.4e-6 + "output_cost_per_token": 0.0000014 }, "perplexity/codellama-70b-instruct": { "input_cost_per_token": 7e-7, @@ -19233,7 +20021,7 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 2.8e-6 + "output_cost_per_token": 0.0000028 }, "perplexity/llama-2-70b-chat": { "input_cost_per_token": 7e-7, @@ -19242,16 +20030,16 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2.8e-6 + "output_cost_per_token": 0.0000028 }, "perplexity/llama-3.1-70b-instruct": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "perplexity", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "perplexity/llama-3.1-8b-instruct": { "input_cost_per_token": 2e-7, @@ -19264,33 +20052,33 @@ }, "perplexity/llama-3.1-sonar-huge-128k-online": { "deprecation_date": "2025-02-22", - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "perplexity", "max_input_tokens": 127072, "max_output_tokens": 127072, "max_tokens": 127072, "mode": "chat", - "output_cost_per_token": 5e-6 + "output_cost_per_token": 0.000005 }, "perplexity/llama-3.1-sonar-large-128k-chat": { "deprecation_date": "2025-02-22", - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "perplexity", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "perplexity/llama-3.1-sonar-large-128k-online": { "deprecation_date": "2025-02-22", - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "perplexity", "max_input_tokens": 127072, "max_output_tokens": 127072, "max_tokens": 127072, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "perplexity/llama-3.1-sonar-small-128k-chat": { "deprecation_date": "2025-02-22", @@ -19337,17 +20125,17 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2.8e-6 + "output_cost_per_token": 0.0000028 }, "perplexity/pplx-70b-online": { "input_cost_per_request": 0.005, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "perplexity", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2.8e-6 + "output_cost_per_token": 0.0000028 }, "perplexity/pplx-7b-chat": { "input_cost_per_token": 7e-8, @@ -19360,7 +20148,7 @@ }, "perplexity/pplx-7b-online": { "input_cost_per_request": 0.005, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "perplexity", "max_input_tokens": 4096, "max_output_tokens": 4096, @@ -19369,12 +20157,12 @@ "output_cost_per_token": 2.8e-7 }, "perplexity/sonar": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "perplexity", "max_input_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "search_context_cost_per_query": { "search_context_size_high": 0.012, "search_context_size_low": 0.005, @@ -19383,14 +20171,14 @@ "supports_web_search": true }, "perplexity/sonar-deep-research": { - "citation_cost_per_token": 2e-6, - "input_cost_per_token": 2e-6, + "citation_cost_per_token": 0.000002, + "input_cost_per_token": 0.000002, "litellm_provider": "perplexity", "max_input_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_reasoning_token": 3e-6, - "output_cost_per_token": 8e-6, + "output_cost_per_reasoning_token": 0.000003, + "output_cost_per_token": 0.000008, "search_context_cost_per_query": { "search_context_size_high": 0.005, "search_context_size_low": 0.005, @@ -19406,7 +20194,7 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.8e-6 + "output_cost_per_token": 0.0000018 }, "perplexity/sonar-medium-online": { "input_cost_per_request": 0.005, @@ -19416,16 +20204,16 @@ "max_output_tokens": 12000, "max_tokens": 12000, "mode": "chat", - "output_cost_per_token": 1.8e-6 + "output_cost_per_token": 0.0000018 }, "perplexity/sonar-pro": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "perplexity", "max_input_tokens": 200000, "max_output_tokens": 8000, "max_tokens": 8000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.014, "search_context_size_low": 0.006, @@ -19434,12 +20222,12 @@ "supports_web_search": true }, "perplexity/sonar-reasoning": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "perplexity", "max_input_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "search_context_cost_per_query": { "search_context_size_high": 0.014, "search_context_size_low": 0.005, @@ -19449,12 +20237,12 @@ "supports_web_search": true }, "perplexity/sonar-reasoning-pro": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "perplexity", "max_input_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "search_context_cost_per_query": { "search_context_size_high": 0.014, "search_context_size_low": 0.006, @@ -19482,6 +20270,116 @@ "mode": "chat", "output_cost_per_token": 2.8e-7 }, + "publicai/swiss-ai/apertus-8b-instruct": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/swiss-ai/apertus-70b-instruct": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/aisingapore/Gemma-SEA-LION-v4-27B-IT": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/BSC-LT/salamandra-7b-instruct-tools-16k": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/BSC-LT/ALIA-40b-instruct_Q8_0": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/allenai/Olmo-3-7B-Instruct": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/aisingapore/Qwen-SEA-LION-v4-32B-IT": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/allenai/Olmo-3-7B-Think": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "publicai/allenai/Olmo-3-32B-Think": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, "qwen.qwen3-coder-480b-a35b-v1:0": { "input_cost_per_token": 2.2e-7, "litellm_provider": "bedrock_converse", @@ -19489,7 +20387,7 @@ "max_output_tokens": 65536, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 1.8e-6, + "output_cost_per_token": 0.0000018, "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true @@ -19513,7 +20411,7 @@ "max_output_tokens": 131072, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 6.0e-7, + "output_cost_per_token": 6e-7, "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true @@ -19525,7 +20423,7 @@ "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 6.0e-7, + "output_cost_per_token": 6e-7, "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true @@ -19571,7 +20469,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2.75e-6, + "output_cost_per_token": 0.00000275, "supports_tool_choice": true }, "replicate/meta/llama-2-70b-chat": { @@ -19581,7 +20479,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2.75e-6, + "output_cost_per_token": 0.00000275, "supports_tool_choice": true }, "replicate/meta/llama-2-7b": { @@ -19611,7 +20509,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.75e-6, + "output_cost_per_token": 0.00000275, "supports_tool_choice": true }, "replicate/meta/llama-3-70b-instruct": { @@ -19621,7 +20519,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.75e-6, + "output_cost_per_token": 0.00000275, "supports_tool_choice": true }, "replicate/meta/llama-3-8b": { @@ -19671,140 +20569,140 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "supports_tool_choice": true }, "rerank-english-v2.0": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "cohere", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "rerank-english-v3.0": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "cohere", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "rerank-multilingual-v2.0": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "cohere", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "rerank-multilingual-v3.0": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "cohere", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "rerank-v3.5": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "cohere", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "nvidia_nim/nvidia/nv-rerankqa-mistral-4b-v3": { - "input_cost_per_query": 0.0, - "input_cost_per_token": 0.0, + "input_cost_per_query": 0, + "input_cost_per_token": 0, "litellm_provider": "nvidia_nim", "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "nvidia_nim/nvidia/llama-3_2-nv-rerankqa-1b-v2": { - "input_cost_per_query": 0.0, - "input_cost_per_token": 0.0, + "input_cost_per_query": 0, + "input_cost_per_token": 0, "litellm_provider": "nvidia_nim", "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "sagemaker/meta-textgeneration-llama-2-13b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "sagemaker", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "sagemaker/meta-textgeneration-llama-2-13b-f": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "sagemaker", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "sagemaker/meta-textgeneration-llama-2-70b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "sagemaker", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "sagemaker/meta-textgeneration-llama-2-70b-b-f": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "sagemaker", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "sagemaker/meta-textgeneration-llama-2-7b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "sagemaker", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "sagemaker/meta-textgeneration-llama-2-7b-f": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "sagemaker", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "sambanova/DeepSeek-R1": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "sambanova", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 7e-6, + "output_cost_per_token": 0.000007, "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/DeepSeek-R1-Distill-Llama-70B": { @@ -19814,17 +20712,17 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.4e-6, + "output_cost_per_token": 0.0000014, "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/DeepSeek-V3-0324": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "sambanova", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 4.5e-6, + "output_cost_per_token": 0.0000045, "source": "https://cloud.sambanova.ai/plans/pricing", "supports_function_calling": true, "supports_reasoning": true, @@ -19840,7 +20738,7 @@ "notes": "For vision models, images are converted to 6432 input tokens and are billed at that amount" }, "mode": "chat", - "output_cost_per_token": 1.8e-6, + "output_cost_per_token": 0.0000018, "source": "https://cloud.sambanova.ai/plans/pricing", "supports_function_calling": true, "supports_response_schema": true, @@ -19864,13 +20762,13 @@ "supports_tool_choice": true }, "sambanova/Meta-Llama-3.1-405B-Instruct": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "sambanova", "max_input_tokens": 16384, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "source": "https://cloud.sambanova.ai/plans/pricing", "supports_function_calling": true, "supports_response_schema": true, @@ -19916,7 +20814,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://cloud.sambanova.ai/plans/pricing", "supports_function_calling": true, "supports_response_schema": true, @@ -19939,7 +20837,7 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/Qwen2-Audio-7B-Instruct": { @@ -19970,8 +20868,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 3e-6, - "output_cost_per_token": 4.5e-6, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.0000045, "litellm_provider": "sambanova", "mode": "chat", "supports_function_calling": true, @@ -19983,8 +20881,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 3e-6, - "output_cost_per_token": 4.5e-6, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.0000045, "litellm_provider": "sambanova", "mode": "chat", "supports_function_calling": true, @@ -19992,7 +20890,6 @@ "supports_reasoning": true, "source": "https://cloud.sambanova.ai/plans/pricing" }, - "snowflake/claude-3-5-sonnet": { "litellm_provider": "snowflake", "max_input_tokens": 18000, @@ -20209,19 +21106,19 @@ "input_cost_per_pixel": 3.81469e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "standard/1024-x-1792/dall-e-3": { "input_cost_per_pixel": 4.359e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "standard/1792-x-1024/dall-e-3": { "input_cost_per_pixel": 4.359e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "tavily/search": { "input_cost_per_query": 0.008, @@ -20288,23 +21185,23 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "text-completion-codestral/codestral-2405": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "text-completion-codestral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "completion", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://docs.mistral.ai/capabilities/code_generation/" }, "text-completion-codestral/codestral-latest": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "text-completion-codestral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "completion", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://docs.mistral.ai/capabilities/code_generation/" }, "text-embedding-004": { @@ -20336,8 +21233,8 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0, - "output_cost_per_token_batches": 0.0, + "output_cost_per_token": 0, + "output_cost_per_token_batches": 0, "output_vector_size": 3072 }, "text-embedding-3-small": { @@ -20347,8 +21244,8 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0, - "output_cost_per_token_batches": 0.0, + "output_cost_per_token": 0, + "output_cost_per_token_batches": 0, "output_vector_size": 1536 }, "text-embedding-ada-002": { @@ -20357,7 +21254,7 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1536 }, "text-embedding-ada-002-v2": { @@ -20367,8 +21264,8 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0, - "output_cost_per_token_batches": 0.0 + "output_cost_per_token": 0, + "output_cost_per_token_batches": 0 }, "text-embedding-large-exp-03-07": { "input_cost_per_character": 2.5e-8, @@ -20393,31 +21290,31 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" }, "text-moderation-007": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 0, "max_tokens": 32768, "mode": "moderation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "text-moderation-latest": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 0, "max_tokens": 32768, "mode": "moderation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "text-moderation-stable": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 0, "max_tokens": 32768, "mode": "moderation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "text-multilingual-embedding-002": { "input_cost_per_character": 2.5e-8, @@ -20441,23 +21338,23 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "text-unicorn": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "vertex_ai-text-models", "max_input_tokens": 8192, "max_output_tokens": 1024, "max_tokens": 1024, "mode": "completion", - "output_cost_per_token": 2.8e-5, + "output_cost_per_token": 0.000028, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "text-unicorn@001": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "vertex_ai-text-models", "max_input_tokens": 8192, "max_output_tokens": 1024, "max_tokens": 1024, "mode": "completion", - "output_cost_per_token": 2.8e-5, + "output_cost_per_token": 0.000028, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "textembedding-gecko": { @@ -20541,29 +21438,29 @@ "output_cost_per_token": 3e-7 }, "together-ai-81.1b-110b": { - "input_cost_per_token": 1.8e-6, + "input_cost_per_token": 0.0000018, "litellm_provider": "together_ai", "mode": "chat", - "output_cost_per_token": 1.8e-6 + "output_cost_per_token": 0.0000018 }, "together-ai-embedding-151m-to-350m": { "input_cost_per_token": 1.6e-8, "litellm_provider": "together_ai", "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "together-ai-embedding-up-to-150m": { "input_cost_per_token": 8e-9, "litellm_provider": "together_ai", "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "together_ai/baai/bge-base-en-v1.5": { "input_cost_per_token": 8e-9, "litellm_provider": "together_ai", "max_input_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 768 }, "together_ai/BAAI/bge-base-en-v1.5": { @@ -20571,7 +21468,7 @@ "litellm_provider": "together_ai", "max_input_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 768 }, "together-ai-up-to-4b": { @@ -20599,7 +21496,7 @@ "litellm_provider": "together_ai", "max_input_tokens": 262000, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "source": "https://www.together.ai/models/qwen3-235b-a22b-instruct-2507-fp8", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -20610,7 +21507,7 @@ "litellm_provider": "together_ai", "max_input_tokens": 256000, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://www.together.ai/models/qwen3-235b-a22b-thinking-2507", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -20628,24 +21525,24 @@ "supports_tool_choice": false }, "together_ai/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "together_ai", "max_input_tokens": 256000, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "source": "https://www.together.ai/models/qwen3-coder-480b-a35b-instruct", "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "together_ai/deepseek-ai/DeepSeek-R1": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "together_ai", "max_input_tokens": 128000, "max_output_tokens": 20480, "max_tokens": 20480, "mode": "chat", - "output_cost_per_token": 7e-6, + "output_cost_per_token": 0.000007, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true @@ -20655,20 +21552,20 @@ "litellm_provider": "together_ai", "max_input_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2.19e-6, + "output_cost_per_token": 0.00000219, "source": "https://www.together.ai/models/deepseek-r1-0528-throughput", "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "together_ai/deepseek-ai/DeepSeek-V3": { - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "together_ai", "max_input_tokens": 65536, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true @@ -20678,7 +21575,7 @@ "litellm_provider": "together_ai", "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.7e-6, + "output_cost_per_token": 0.0000017, "source": "https://www.together.ai/models/deepseek-v3-1", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -20731,10 +21628,10 @@ "supports_tool_choice": true }, "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": { - "input_cost_per_token": 3.5e-6, + "input_cost_per_token": 0.0000035, "litellm_provider": "together_ai", "mode": "chat", - "output_cost_per_token": 3.5e-6, + "output_cost_per_token": 0.0000035, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true @@ -20785,10 +21682,10 @@ "supports_tool_choice": true }, "together_ai/moonshotai/Kimi-K2-Instruct": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "together_ai", "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://www.together.ai/models/kimi-k2-instruct", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -20828,20 +21725,20 @@ "litellm_provider": "together_ai", "max_input_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-6, + "output_cost_per_token": 0.0000011, "source": "https://www.together.ai/models/glm-4-5-air", "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "together_ai/zai-org/GLM-4.6": { - "input_cost_per_token": 0.6e-6, + "input_cost_per_token": 6e-7, "litellm_provider": "together_ai", "max_input_tokens": 200000, "max_output_tokens": 200000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 2.2e-6, + "output_cost_per_token": 0.0000022, "source": "https://www.together.ai/models/glm-4-6", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -20849,11 +21746,11 @@ "supports_tool_choice": true }, "together_ai/moonshotai/Kimi-K2-Instruct-0905": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "together_ai", "max_input_tokens": 262144, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://www.together.ai/models/kimi-k2-0905", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -20864,7 +21761,7 @@ "litellm_provider": "together_ai", "max_input_tokens": 262144, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://www.together.ai/models/qwen3-next-80b-a3b-instruct", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -20875,20 +21772,20 @@ "litellm_provider": "together_ai", "max_input_tokens": 262144, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://www.together.ai/models/qwen3-next-80b-a3b-thinking", "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "tts-1": { - "input_cost_per_character": 1.5e-5, + "input_cost_per_character": 0.000015, "litellm_provider": "openai", "mode": "audio_speech", "supported_endpoints": ["/v1/audio/speech"] }, "tts-1-hd": { - "input_cost_per_character": 3e-5, + "input_cost_per_character": 0.00003, "litellm_provider": "openai", "mode": "audio_speech", "supported_endpoints": ["/v1/audio/speech"] @@ -20920,13 +21817,13 @@ "supports_response_schema": true }, "us.amazon.nova-premier-v1:0": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "bedrock_converse", "max_input_tokens": 1000000, "max_output_tokens": 10000, "max_tokens": 10000, "mode": "chat", - "output_cost_per_token": 1.25e-5, + "output_cost_per_token": 0.0000125, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": false, @@ -20940,7 +21837,7 @@ "max_output_tokens": 10000, "max_tokens": 10000, "mode": "chat", - "output_cost_per_token": 3.2e-6, + "output_cost_per_token": 0.0000032, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, @@ -20948,7 +21845,7 @@ "supports_vision": true }, "us.anthropic.claude-3-5-haiku-20241022-v1:0": { - "cache_creation_input_token_cost": 1e-6, + "cache_creation_input_token_cost": 0.000001, "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 8e-7, "litellm_provider": "bedrock", @@ -20956,7 +21853,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -20965,15 +21862,15 @@ "supports_tool_choice": true }, "us.anthropic.claude-haiku-4-5-20251001-v1:0": { - "cache_creation_input_token_cost": 1.375e-6, + "cache_creation_input_token_cost": 0.000001375, "cache_read_input_token_cost": 1.1e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5.5e-6, + "output_cost_per_token": 0.0000055, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, "supports_computer_use": true, @@ -20987,13 +21884,13 @@ "tool_use_system_prompt_tokens": 346 }, "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -21001,15 +21898,15 @@ "supports_vision": true }, "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -21020,15 +21917,15 @@ "supports_vision": true }, "us.anthropic.claude-3-7-sonnet-20250219-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -21046,7 +21943,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -21054,26 +21951,26 @@ "supports_vision": true }, "us.anthropic.claude-3-opus-20240229-v1:0": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true }, "us.anthropic.claude-3-sonnet-20240229-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -21081,15 +21978,15 @@ "supports_vision": true }, "us.anthropic.claude-opus-4-1-20250805-v1:0": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -21107,19 +22004,19 @@ "tool_use_system_prompt_tokens": 159 }, "us.anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 4.125e-6, + "cache_creation_input_token_cost": 0.000004125, "cache_read_input_token_cost": 3.3e-7, - "input_cost_per_token": 3.3e-6, - "input_cost_per_token_above_200k_tokens": 6.6e-6, - "output_cost_per_token_above_200k_tokens": 2.475e-5, - "cache_creation_input_token_cost_above_200k_tokens": 8.25e-6, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.65e-5, + "output_cost_per_token": 0.0000165, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -21137,15 +22034,15 @@ "tool_use_system_prompt_tokens": 346 }, "au.anthropic.claude-haiku-4-5-20251001-v1:0": { - "cache_creation_input_token_cost": 1.375e-6, + "cache_creation_input_token_cost": 0.000001375, "cache_read_input_token_cost": 1.1e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5.5e-6, + "output_cost_per_token": 0.0000055, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -21158,15 +22055,67 @@ "tool_use_system_prompt_tokens": 346 }, "us.anthropic.claude-opus-4-20250514-v1:0": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "us.anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "global.anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -21184,19 +22133,19 @@ "tool_use_system_prompt_tokens": 159 }, "us.anthropic.claude-sonnet-4-20250514-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -21214,25 +22163,25 @@ "tool_use_system_prompt_tokens": 159 }, "us.deepseek.r1-v1:0": { - "input_cost_per_token": 1.35e-6, + "input_cost_per_token": 0.00000135, "litellm_provider": "bedrock_converse", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 5.4e-6, + "output_cost_per_token": 0.0000054, "supports_function_calling": false, "supports_reasoning": true, "supports_tool_choice": false }, "us.meta.llama3-1-405b-instruct-v1:0": { - "input_cost_per_token": 5.32e-6, + "input_cost_per_token": 0.00000532, "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.6e-5, + "output_cost_per_token": 0.000016, "supports_function_calling": true, "supports_tool_choice": false }, @@ -21293,13 +22242,13 @@ "supports_tool_choice": false }, "us.meta.llama3-2-90b-instruct-v1:0": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_tool_choice": false, "supports_vision": true @@ -21346,24 +22295,24 @@ "supports_tool_choice": false }, "us.mistral.pixtral-large-2502-v1:0": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "bedrock_converse", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_function_calling": true, "supports_tool_choice": false }, "v0/v0-1.0-md": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "v0", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_system_messages": true, @@ -21371,13 +22320,13 @@ "supports_vision": true }, "v0/v0-1.5-lg": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "v0", "max_input_tokens": 512000, "max_output_tokens": 512000, "max_tokens": 512000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_system_messages": true, @@ -21385,13 +22334,13 @@ "supports_vision": true }, "v0/v0-1.5-md": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "v0", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_system_messages": true, @@ -21441,7 +22390,7 @@ "max_output_tokens": 66536, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 1.6e-6 + "output_cost_per_token": 0.0000016 }, "vercel_ai_gateway/amazon/nova-lite": { "input_cost_per_token": 6e-8, @@ -21468,7 +22417,7 @@ "max_output_tokens": 8192, "max_tokens": 300000, "mode": "chat", - "output_cost_per_token": 3.2e-6 + "output_cost_per_token": 0.0000032 }, "vercel_ai_gateway/amazon/titan-embed-text-v2": { "input_cost_per_token": 2e-8, @@ -21477,7 +22426,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/anthropic/claude-3-haiku": { "cache_creation_input_token_cost": 3e-7, @@ -21488,21 +22437,21 @@ "max_output_tokens": 4096, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.25e-6 + "output_cost_per_token": 0.00000125 }, "vercel_ai_gateway/anthropic/claude-3-opus": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 7.5e-5 + "output_cost_per_token": 0.000075 }, "vercel_ai_gateway/anthropic/claude-3.5-haiku": { - "cache_creation_input_token_cost": 1e-6, + "cache_creation_input_token_cost": 0.000001, "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 8e-7, "litellm_provider": "vercel_ai_gateway", @@ -21510,60 +22459,60 @@ "max_output_tokens": 8192, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 4e-6 + "output_cost_per_token": 0.000004 }, "vercel_ai_gateway/anthropic/claude-3.5-sonnet": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/anthropic/claude-3.7-sonnet": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/anthropic/claude-4-opus": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 7.5e-5 + "output_cost_per_token": 0.000075 }, "vercel_ai_gateway/anthropic/claude-4-sonnet": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/cohere/command-a": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 256000, "max_output_tokens": 8000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1e-5 + "output_cost_per_token": 0.00001 }, "vercel_ai_gateway/cohere/command-r": { "input_cost_per_token": 1.5e-7, @@ -21575,13 +22524,13 @@ "output_cost_per_token": 6e-7 }, "vercel_ai_gateway/cohere/command-r-plus": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5 + "output_cost_per_token": 0.00001 }, "vercel_ai_gateway/cohere/embed-v4.0": { "input_cost_per_token": 1.2e-7, @@ -21590,7 +22539,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/deepseek/deepseek-r1": { "input_cost_per_token": 5.5e-7, @@ -21599,7 +22548,7 @@ "max_output_tokens": 8192, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2.19e-6 + "output_cost_per_token": 0.00000219 }, "vercel_ai_gateway/deepseek/deepseek-r1-distill-llama-70b": { "input_cost_per_token": 7.5e-7, @@ -21644,16 +22593,16 @@ "max_output_tokens": 65536, "max_tokens": 1000000, "mode": "chat", - "output_cost_per_token": 2.5e-6 + "output_cost_per_token": 0.0000025 }, "vercel_ai_gateway/google/gemini-2.5-pro": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1048576, "max_output_tokens": 65536, "max_tokens": 1048576, "mode": "chat", - "output_cost_per_token": 1e-5 + "output_cost_per_token": 0.00001 }, "vercel_ai_gateway/google/gemini-embedding-001": { "input_cost_per_token": 1.5e-7, @@ -21662,7 +22611,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/google/gemma-2-9b": { "input_cost_per_token": 2e-7, @@ -21680,7 +22629,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/google/text-multilingual-embedding-002": { "input_cost_per_token": 2.5e-8, @@ -21689,7 +22638,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/inception/mercury-coder-small": { "input_cost_per_token": 2.5e-7, @@ -21698,7 +22647,7 @@ "max_output_tokens": 16384, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "vercel_ai_gateway/meta/llama-3-70b": { "input_cost_per_token": 5.9e-7, @@ -21815,7 +22764,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/mistral/devstral-small": { "input_cost_per_token": 7e-8, @@ -21827,13 +22776,13 @@ "output_cost_per_token": 2.8e-7 }, "vercel_ai_gateway/mistral/magistral-medium": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 64000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 5e-6 + "output_cost_per_token": 0.000005 }, "vercel_ai_gateway/mistral/magistral-small": { "input_cost_per_token": 5e-7, @@ -21842,7 +22791,7 @@ "max_output_tokens": 64000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-6 + "output_cost_per_token": 0.0000015 }, "vercel_ai_gateway/mistral/ministral-3b": { "input_cost_per_token": 4e-8, @@ -21869,16 +22818,16 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/mistral/mistral-large": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 32000, "max_output_tokens": 4000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 6e-6 + "output_cost_per_token": 0.000006 }, "vercel_ai_gateway/mistral/mistral-saba-24b": { "input_cost_per_token": 7.9e-7, @@ -21899,13 +22848,13 @@ "output_cost_per_token": 3e-7 }, "vercel_ai_gateway/mistral/mixtral-8x22b-instruct": { - "input_cost_per_token": 1.2e-6, + "input_cost_per_token": 0.0000012, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 65536, "max_output_tokens": 2048, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1.2e-6 + "output_cost_per_token": 0.0000012 }, "vercel_ai_gateway/mistral/pixtral-12b": { "input_cost_per_token": 1.5e-7, @@ -21917,13 +22866,13 @@ "output_cost_per_token": 1.5e-7 }, "vercel_ai_gateway/mistral/pixtral-large": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 4000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 6e-6 + "output_cost_per_token": 0.000006 }, "vercel_ai_gateway/moonshotai/kimi-k2": { "input_cost_per_token": 5.5e-7, @@ -21932,7 +22881,7 @@ "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.2e-6 + "output_cost_per_token": 0.0000022 }, "vercel_ai_gateway/morph/morph-v3-fast": { "input_cost_per_token": 8e-7, @@ -21941,7 +22890,7 @@ "max_output_tokens": 16384, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.2e-6 + "output_cost_per_token": 0.0000012 }, "vercel_ai_gateway/morph/morph-v3-large": { "input_cost_per_token": 9e-7, @@ -21950,7 +22899,7 @@ "max_output_tokens": 16384, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.9e-6 + "output_cost_per_token": 0.0000019 }, "vercel_ai_gateway/openai/gpt-3.5-turbo": { "input_cost_per_token": 5e-7, @@ -21959,39 +22908,39 @@ "max_output_tokens": 4096, "max_tokens": 16385, "mode": "chat", - "output_cost_per_token": 1.5e-6 + "output_cost_per_token": 0.0000015 }, "vercel_ai_gateway/openai/gpt-3.5-turbo-instruct": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "vercel_ai_gateway/openai/gpt-4-turbo": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 3e-5 + "output_cost_per_token": 0.00003 }, "vercel_ai_gateway/openai/gpt-4.1": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 1047576, "mode": "chat", - "output_cost_per_token": 8e-6 + "output_cost_per_token": 0.000008 }, "vercel_ai_gateway/openai/gpt-4.1-mini": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 1e-7, "input_cost_per_token": 4e-7, "litellm_provider": "vercel_ai_gateway", @@ -21999,10 +22948,10 @@ "max_output_tokens": 32768, "max_tokens": 1047576, "mode": "chat", - "output_cost_per_token": 1.6e-6 + "output_cost_per_token": 0.0000016 }, "vercel_ai_gateway/openai/gpt-4.1-nano": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 2.5e-8, "input_cost_per_token": 1e-7, "litellm_provider": "vercel_ai_gateway", @@ -22013,18 +22962,18 @@ "output_cost_per_token": 4e-7 }, "vercel_ai_gateway/openai/gpt-4o": { - "cache_creation_input_token_cost": 0.0, - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5 + "output_cost_per_token": 0.00001 }, "vercel_ai_gateway/openai/gpt-4o-mini": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 7.5e-8, "input_cost_per_token": 1.5e-7, "litellm_provider": "vercel_ai_gateway", @@ -22035,48 +22984,48 @@ "output_cost_per_token": 6e-7 }, "vercel_ai_gateway/openai/o1": { - "cache_creation_input_token_cost": 0.0, - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 6e-5 + "output_cost_per_token": 0.00006 }, "vercel_ai_gateway/openai/o3": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 8e-6 + "output_cost_per_token": 0.000008 }, "vercel_ai_gateway/openai/o3-mini": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 4.4e-6 + "output_cost_per_token": 0.0000044 }, "vercel_ai_gateway/openai/o4-mini": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 2.75e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 4.4e-6 + "output_cost_per_token": 0.0000044 }, "vercel_ai_gateway/openai/text-embedding-3-large": { "input_cost_per_token": 1.3e-7, @@ -22085,7 +23034,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/openai/text-embedding-3-small": { "input_cost_per_token": 2e-8, @@ -22094,7 +23043,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/openai/text-embedding-ada-002": { "input_cost_per_token": 1e-7, @@ -22103,97 +23052,97 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/perplexity/sonar": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 127000, "max_output_tokens": 8000, "max_tokens": 127000, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "vercel_ai_gateway/perplexity/sonar-pro": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 8000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/perplexity/sonar-reasoning": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 127000, "max_output_tokens": 8000, "max_tokens": 127000, "mode": "chat", - "output_cost_per_token": 5e-6 + "output_cost_per_token": 0.000005 }, "vercel_ai_gateway/perplexity/sonar-reasoning-pro": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 127000, "max_output_tokens": 8000, "max_tokens": 127000, "mode": "chat", - "output_cost_per_token": 8e-6 + "output_cost_per_token": 0.000008 }, "vercel_ai_gateway/vercel/v0-1.0-md": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 32000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/vercel/v0-1.5-md": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/xai/grok-2": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 131072, "max_output_tokens": 4000, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1e-5 + "output_cost_per_token": 0.00001 }, "vercel_ai_gateway/xai/grok-2-vision": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1e-5 + "output_cost_per_token": 0.00001 }, "vercel_ai_gateway/xai/grok-3": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/xai/grok-3-fast": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.5e-5 + "output_cost_per_token": 0.000025 }, "vercel_ai_gateway/xai/grok-3-mini": { "input_cost_per_token": 3e-7, @@ -22211,16 +23160,16 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 4e-6 + "output_cost_per_token": 0.000004 }, "vercel_ai_gateway/xai/grok-4": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/zai/glm-4.5": { "input_cost_per_token": 6e-7, @@ -22229,7 +23178,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.2e-6 + "output_cost_per_token": 0.0000022 }, "vercel_ai_gateway/zai/glm-4.5-air": { "input_cost_per_token": 2e-7, @@ -22238,7 +23187,7 @@ "max_output_tokens": 96000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-6 + "output_cost_per_token": 0.0000011 }, "vercel_ai_gateway/zai/glm-4.6": { "litellm_provider": "vercel_ai_gateway", @@ -22248,48 +23197,55 @@ "max_output_tokens": 200000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.8e-6, + "output_cost_per_token": 0.0000018, "source": "https://vercel.com/ai-gateway/models/glm-4.6", "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, + "vertex_ai/chirp": { + "input_cost_per_character": 0.00003, + "litellm_provider": "vertex_ai", + "mode": "audio_speech", + "source": "https://cloud.google.com/text-to-speech/pricing", + "supported_endpoints": ["/v1/audio/speech"] + }, "vertex_ai/claude-3-5-haiku": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, "supports_tool_choice": true }, "vertex_ai/claude-3-5-haiku@20241022": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, "supports_tool_choice": true }, "vertex_ai/claude-haiku-4-5@20251001": { - "cache_creation_input_token_cost": 1.25e-6, + "cache_creation_input_token_cost": 0.00000125, "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/haiku-4-5", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -22300,13 +23256,13 @@ "supports_tool_choice": true }, "vertex_ai/claude-3-5-sonnet": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -22315,13 +23271,13 @@ "supports_vision": true }, "vertex_ai/claude-3-5-sonnet-v2": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -22330,13 +23286,13 @@ "supports_vision": true }, "vertex_ai/claude-3-5-sonnet-v2@20241022": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -22345,13 +23301,13 @@ "supports_vision": true }, "vertex_ai/claude-3-5-sonnet@20240620": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -22359,16 +23315,16 @@ "supports_vision": true }, "vertex_ai/claude-3-7-sonnet@20250219": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, "deprecation_date": "2025-06-01", - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -22387,7 +23343,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, @@ -22400,74 +23356,74 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/claude-3-opus": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/claude-3-opus@20240229": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/claude-3-sonnet": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/claude-3-sonnet@20240229": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/claude-opus-4": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -22485,55 +23441,107 @@ "tool_use_system_prompt_tokens": 159 }, "vertex_ai/claude-opus-4-1": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, - "input_cost_per_token_batches": 7.5e-6, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000075, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, - "output_cost_per_token_batches": 3.75e-5, + "output_cost_per_token": 0.000075, + "output_cost_per_token_batches": 0.0000375, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/claude-opus-4-1@20250805": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, - "input_cost_per_token_batches": 7.5e-6, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000075, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, - "output_cost_per_token_batches": 3.75e-5, + "output_cost_per_token": 0.000075, + "output_cost_per_token_batches": 0.0000375, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, + "vertex_ai/claude-opus-4-5": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/claude-opus-4-5@20251101": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, "vertex_ai/claude-sonnet-4-5": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, - "input_cost_per_token_batches": 1.5e-6, + "input_cost_per_token_batches": 0.0000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_batches": 7.5e-6, + "output_cost_per_token": 0.000015, + "output_cost_per_token_batches": 0.0000075, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -22545,21 +23553,21 @@ "supports_vision": true }, "vertex_ai/claude-sonnet-4-5@20250929": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, - "input_cost_per_token_batches": 1.5e-6, + "input_cost_per_token_batches": 0.0000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_batches": 7.5e-6, + "output_cost_per_token": 0.000015, + "output_cost_per_token_batches": 0.0000075, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -22571,15 +23579,15 @@ "supports_vision": true }, "vertex_ai/claude-opus-4@20250514": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -22597,19 +23605,19 @@ "tool_use_system_prompt_tokens": 159 }, "vertex_ai/claude-sonnet-4": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -22627,19 +23635,19 @@ "tool_use_system_prompt_tokens": 159 }, "vertex_ai/claude-sonnet-4@20250514": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -22734,13 +23742,13 @@ "supports_tool_choice": true }, "vertex_ai/deepseek-ai/deepseek-v3.1-maas": { - "input_cost_per_token": 1.35e-6, + "input_cost_per_token": 0.00000135, "litellm_provider": "vertex_ai-deepseek_models", "max_input_tokens": 163840, "max_output_tokens": 32768, "max_tokens": 163840, "mode": "chat", - "output_cost_per_token": 5.4e-6, + "output_cost_per_token": 0.0000054, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supported_regions": ["us-west2"], "supports_assistant_prefill": true, @@ -22750,13 +23758,13 @@ "supports_tool_choice": true }, "vertex_ai/deepseek-ai/deepseek-r1-0528-maas": { - "input_cost_per_token": 1.35e-6, + "input_cost_per_token": 0.00000135, "litellm_provider": "vertex_ai-deepseek_models", "max_input_tokens": 65336, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5.4e-6, + "output_cost_per_token": 0.0000054, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -22766,7 +23774,7 @@ }, "vertex_ai/gemini-2.5-flash-image": { "cache_read_input_token_cost": 3e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -22780,8 +23788,8 @@ "max_videos_per_prompt": 10, "mode": "image_generation", "output_cost_per_image": 0.039, - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-generation#edit-an-image", "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], @@ -22802,16 +23810,17 @@ }, "vertex_ai/gemini-3-pro-image-preview": { "input_cost_per_image": 0.0011, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "vertex_ai-language-models", "max_input_tokens": 65536, "max_output_tokens": 32768, "max_tokens": 65536, "mode": "image_generation", "output_cost_per_image": 0.134, - "output_cost_per_token": 1.2e-5, - "output_cost_per_token_batches": 6e-6, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 0.000012, + "output_cost_per_token_batches": 0.000006, "source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image" }, "vertex_ai/imagegeneration@006": { @@ -22873,23 +23882,23 @@ "supports_tool_choice": true }, "vertex_ai/jamba-1.5-large": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vertex_ai-ai21_models", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_tool_choice": true }, "vertex_ai/jamba-1.5-large@001": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vertex_ai-ai21_models", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_tool_choice": true }, "vertex_ai/jamba-1.5-mini": { @@ -22913,33 +23922,33 @@ "supports_tool_choice": true }, "vertex_ai/meta/llama-3.1-405b-instruct-maas": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 128000, "max_output_tokens": 2048, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.6e-5, + "output_cost_per_token": 0.000016, "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/meta/llama-3.1-70b-instruct-maas": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 128000, "max_output_tokens": 2048, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/meta/llama-3.1-8b-instruct-maas": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 128000, "max_output_tokens": 2048, @@ -22948,14 +23957,14 @@ "notes": "VertexAI states that The Llama 3.1 API service for llama-3.1-70b-instruct-maas and llama-3.1-8b-instruct-maas are in public preview and at no cost." }, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 128000, "max_output_tokens": 2048, @@ -22964,7 +23973,7 @@ "notes": "VertexAI states that The Llama 3.2 API service is at no cost during public preview, and will be priced as per dollar-per-1M-tokens at GA." }, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", "supports_system_messages": true, "supports_tool_choice": true, @@ -22977,7 +23986,7 @@ "max_output_tokens": 1000000, "max_tokens": 1000000, "mode": "chat", - "output_cost_per_token": 1.15e-6, + "output_cost_per_token": 0.00000115, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "code"], @@ -22991,7 +24000,7 @@ "max_output_tokens": 1000000, "max_tokens": 1000000, "mode": "chat", - "output_cost_per_token": 1.15e-6, + "output_cost_per_token": 0.00000115, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supported_modalities": ["text", "image"], "supported_output_modalities": ["text", "code"], @@ -23027,35 +24036,35 @@ "supports_tool_choice": true }, "vertex_ai/meta/llama3-405b-instruct-maas": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 32000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supports_tool_choice": true }, "vertex_ai/meta/llama3-70b-instruct-maas": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 32000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supports_tool_choice": true }, "vertex_ai/meta/llama3-8b-instruct-maas": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 32000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supports_tool_choice": true }, @@ -23066,7 +24075,7 @@ "max_output_tokens": 196608, "max_tokens": 196608, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supports_function_calling": true, "supports_tool_choice": true @@ -23078,7 +24087,7 @@ "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 2.5e-6, + "output_cost_per_token": 0.0000025, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supports_function_calling": true, "supports_tool_choice": true, @@ -23091,7 +24100,7 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_tool_choice": true }, @@ -23102,7 +24111,7 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_tool_choice": true }, @@ -23113,7 +24122,7 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_tool_choice": true }, @@ -23124,62 +24133,62 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/mistral-large-2411": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 128000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/mistral-large@2407": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 128000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/mistral-large@2411-001": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 128000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/mistral-large@latest": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 128000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/mistral-nemo@2407": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_function_calling": true, "supports_tool_choice": true }, @@ -23195,32 +24204,32 @@ "supports_tool_choice": true }, "vertex_ai/mistral-small-2503": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/mistral-small-2503@001": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/mistral-ocr-2505": { "litellm_provider": "vertex_ai", "mode": "ocr", - "ocr_cost_per_page": 5e-4, + "ocr_cost_per_page": 0.0005, "supported_endpoints": ["/v1/ocr"], "source": "https://cloud.google.com/generative-ai-app-builder/pricing" }, @@ -23253,19 +24262,19 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vertex_ai-qwen_models", "max_input_tokens": 262144, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_function_calling": true, "supports_tool_choice": true @@ -23277,7 +24286,7 @@ "max_output_tokens": 262144, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_function_calling": true, "supports_tool_choice": true @@ -23289,7 +24298,7 @@ "max_output_tokens": 262144, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_function_calling": true, "supports_tool_choice": true @@ -23373,7 +24382,7 @@ "max_query_tokens": 16000, "max_tokens": 16000, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/rerank-2-lite": { "input_cost_per_query": 2e-8, @@ -23384,7 +24393,7 @@ "max_query_tokens": 8000, "max_tokens": 8000, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-2": { "input_cost_per_token": 1e-7, @@ -23392,7 +24401,7 @@ "max_input_tokens": 4000, "max_tokens": 4000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-3": { "input_cost_per_token": 6e-8, @@ -23400,7 +24409,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-3-large": { "input_cost_per_token": 1.8e-7, @@ -23408,7 +24417,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-3-lite": { "input_cost_per_token": 2e-8, @@ -23416,7 +24425,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-3.5": { "input_cost_per_token": 6e-8, @@ -23424,7 +24433,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-3.5-lite": { "input_cost_per_token": 2e-8, @@ -23432,7 +24441,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-code-2": { "input_cost_per_token": 1.2e-7, @@ -23440,7 +24449,7 @@ "max_input_tokens": 16000, "max_tokens": 16000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-code-3": { "input_cost_per_token": 1.8e-7, @@ -23448,7 +24457,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-context-3": { "input_cost_per_token": 1.8e-7, @@ -23456,7 +24465,7 @@ "max_input_tokens": 120000, "max_tokens": 120000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-finance-2": { "input_cost_per_token": 1.2e-7, @@ -23464,7 +24473,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-large-2": { "input_cost_per_token": 1.2e-7, @@ -23472,7 +24481,7 @@ "max_input_tokens": 16000, "max_tokens": 16000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-law-2": { "input_cost_per_token": 1.2e-7, @@ -23480,7 +24489,7 @@ "max_input_tokens": 16000, "max_tokens": 16000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-lite-01": { "input_cost_per_token": 1e-7, @@ -23488,7 +24497,7 @@ "max_input_tokens": 4096, "max_tokens": 4096, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-lite-02-instruct": { "input_cost_per_token": 1e-7, @@ -23496,7 +24505,7 @@ "max_input_tokens": 4000, "max_tokens": 4000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-multimodal-3": { "input_cost_per_token": 1.2e-7, @@ -23504,7 +24513,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "wandb/openai/gpt-oss-120b": { "max_tokens": 131072, @@ -23564,8 +24573,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.135, - "output_cost_per_token": 0.4, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000025, "litellm_provider": "wandb", "mode": "chat" }, @@ -23633,13 +24642,13 @@ "mode": "chat" }, "watsonx/ibm/granite-3-8b-instruct": { - "input_cost_per_token": 0.2e-6, + "input_cost_per_token": 2e-7, "litellm_provider": "watsonx", "max_input_tokens": 8192, "max_output_tokens": 1024, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.2e-6, + "output_cost_per_token": 2e-7, "supports_audio_input": false, "supports_audio_output": false, "supports_function_calling": true, @@ -23651,13 +24660,13 @@ "supports_vision": false }, "watsonx/mistralai/mistral-large": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "watsonx", "max_input_tokens": 131072, "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 10e-6, + "output_cost_per_token": 0.00001, "supports_audio_input": false, "supports_audio_output": false, "supports_function_calling": true, @@ -23696,8 +24705,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.6e-6, - "output_cost_per_token": 0.6e-6, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23708,8 +24717,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.6e-6, - "output_cost_per_token": 0.6e-6, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23720,8 +24729,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.6e-6, - "output_cost_per_token": 0.6e-6, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23732,8 +24741,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.2e-6, - "output_cost_per_token": 0.2e-6, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23744,8 +24753,8 @@ "max_tokens": 20480, "max_input_tokens": 20480, "max_output_tokens": 20480, - "input_cost_per_token": 0.06e-6, - "output_cost_per_token": 0.25e-6, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 2.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23756,8 +24765,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.1e-6, - "output_cost_per_token": 0.1e-6, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23768,8 +24777,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.2e-6, - "output_cost_per_token": 0.2e-6, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23780,8 +24789,8 @@ "max_tokens": 512, "max_input_tokens": 512, "max_output_tokens": 512, - "input_cost_per_token": 0.38e-6, - "output_cost_per_token": 0.38e-6, + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 3.8e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23792,8 +24801,8 @@ "max_tokens": 512, "max_input_tokens": 512, "max_output_tokens": 512, - "input_cost_per_token": 0.38e-6, - "output_cost_per_token": 0.38e-6, + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 3.8e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23804,8 +24813,8 @@ "max_tokens": 512, "max_input_tokens": 512, "max_output_tokens": 512, - "input_cost_per_token": 0.38e-6, - "output_cost_per_token": 0.38e-6, + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 3.8e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23816,8 +24825,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.1e-6, - "output_cost_per_token": 0.1e-6, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23828,8 +24837,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.35e-6, - "output_cost_per_token": 0.35e-6, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 3.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23840,8 +24849,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.1e-6, - "output_cost_per_token": 0.1e-6, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23852,8 +24861,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.15e-6, - "output_cost_per_token": 0.15e-6, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23864,8 +24873,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 2e-6, - "output_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23876,8 +24885,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.71e-6, - "output_cost_per_token": 0.71e-6, + "input_cost_per_token": 7.1e-7, + "output_cost_per_token": 7.1e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23888,8 +24897,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.35e-6, - "output_cost_per_token": 1.4e-6, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 0.0000014, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23900,8 +24909,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.35e-6, - "output_cost_per_token": 0.35e-6, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 3.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23912,8 +24921,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 3e-6, - "output_cost_per_token": 10e-6, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.00001, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23924,8 +24933,8 @@ "max_tokens": 32000, "max_input_tokens": 32000, "max_output_tokens": 32000, - "input_cost_per_token": 0.1e-6, - "output_cost_per_token": 0.3e-6, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23936,8 +24945,8 @@ "max_tokens": 32000, "max_input_tokens": 32000, "max_output_tokens": 32000, - "input_cost_per_token": 0.1e-6, - "output_cost_per_token": 0.3e-6, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23948,8 +24957,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.35e-6, - "output_cost_per_token": 0.35e-6, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 3.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23960,8 +24969,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.15e-6, - "output_cost_per_token": 0.6e-6, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23972,14 +24981,21 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 1.8e-6, - "output_cost_per_token": 1.8e-6, + "input_cost_per_token": 0.0000018, + "output_cost_per_token": 0.0000018, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, "supports_parallel_function_calling": false, "supports_vision": false }, + "watsonx/whisper-large-v3-turbo": { + "input_cost_per_second": 0.0001, + "output_cost_per_second": 0.0001, + "litellm_provider": "watsonx", + "mode": "audio_transcription", + "supported_endpoints": ["/v1/audio/transcriptions"] + }, "whisper-1": { "input_cost_per_second": 0.0001, "litellm_provider": "openai", @@ -23988,91 +25004,91 @@ "supported_endpoints": ["/v1/audio/transcriptions"] }, "xai/grok-2": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "xai/grok-2-1212": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "xai/grok-2-latest": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "xai/grok-2-vision": { - "input_cost_per_image": 2e-6, - "input_cost_per_token": 2e-6, + "input_cost_per_image": 0.000002, + "input_cost_per_token": 0.000002, "litellm_provider": "xai", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true, "supports_web_search": true }, "xai/grok-2-vision-1212": { - "input_cost_per_image": 2e-6, - "input_cost_per_token": 2e-6, + "input_cost_per_image": 0.000002, + "input_cost_per_token": 0.000002, "litellm_provider": "xai", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true, "supports_web_search": true }, "xai/grok-2-vision-latest": { - "input_cost_per_image": 2e-6, - "input_cost_per_token": 2e-6, + "input_cost_per_image": 0.000002, + "input_cost_per_token": 0.000002, "litellm_provider": "xai", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true, "supports_web_search": true }, "xai/grok-3": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_response_schema": false, @@ -24080,13 +25096,13 @@ "supports_web_search": true }, "xai/grok-3-beta": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_response_schema": false, @@ -24094,13 +25110,13 @@ "supports_web_search": true }, "xai/grok-3-fast-beta": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.5e-5, + "output_cost_per_token": 0.000025, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_response_schema": false, @@ -24108,13 +25124,13 @@ "supports_web_search": true }, "xai/grok-3-fast-latest": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.5e-5, + "output_cost_per_token": 0.000025, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_response_schema": false, @@ -24122,13 +25138,13 @@ "supports_web_search": true }, "xai/grok-3-latest": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_response_schema": false, @@ -24172,7 +25188,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_reasoning": true, @@ -24187,7 +25203,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_reasoning": true, @@ -24202,7 +25218,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_reasoning": true, @@ -24226,13 +25242,13 @@ "supports_web_search": true }, "xai/grok-4": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "xai", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_tool_choice": true, @@ -24240,15 +25256,15 @@ }, "xai/grok-4-fast-reasoning": { "litellm_provider": "xai", - "max_input_tokens": 2e6, - "max_output_tokens": 2e6, - "max_tokens": 2e6, - "mode": "chat", - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, - "output_cost_per_token": 0.5e-6, - "output_cost_per_token_above_128k_tokens": 1e-6, - "cache_read_input_token_cost": 0.05e-6, + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, + "mode": "chat", + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, + "cache_read_input_token_cost": 5e-8, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_tool_choice": true, @@ -24256,61 +25272,61 @@ }, "xai/grok-4-fast-non-reasoning": { "litellm_provider": "xai", - "max_input_tokens": 2e6, - "max_output_tokens": 2e6, - "cache_read_input_token_cost": 0.05e-6, - "max_tokens": 2e6, - "mode": "chat", - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, - "output_cost_per_token": 0.5e-6, - "output_cost_per_token_above_128k_tokens": 1e-6, + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "cache_read_input_token_cost": 5e-8, + "max_tokens": 2000000, + "mode": "chat", + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "xai/grok-4-0709": { - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_128k_tokens": 6e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_128k_tokens": 0.000006, "litellm_provider": "xai", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_above_128k_tokens": 30e-6, + "output_cost_per_token": 0.000015, + "output_cost_per_token_above_128k_tokens": 0.00003, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "xai/grok-4-latest": { - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_128k_tokens": 6e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_128k_tokens": 0.000006, "litellm_provider": "xai", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_above_128k_tokens": 30e-6, + "output_cost_per_token": 0.000015, + "output_cost_per_token_above_128k_tokens": 0.00003, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "xai/grok-4-1-fast": { - "cache_read_input_token_cost": 0.05e-6, - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, "litellm_provider": "xai", - "max_input_tokens": 2e6, - "max_output_tokens": 2e6, - "max_tokens": 2e6, + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, "mode": "chat", - "output_cost_per_token": 0.5e-6, - "output_cost_per_token_above_128k_tokens": 1e-6, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", "supports_audio_input": true, "supports_function_calling": true, @@ -24321,16 +25337,16 @@ "supports_web_search": true }, "xai/grok-4-1-fast-reasoning": { - "cache_read_input_token_cost": 0.05e-6, - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, "litellm_provider": "xai", - "max_input_tokens": 2e6, - "max_output_tokens": 2e6, - "max_tokens": 2e6, + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, "mode": "chat", - "output_cost_per_token": 0.5e-6, - "output_cost_per_token_above_128k_tokens": 1e-6, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", "supports_audio_input": true, "supports_function_calling": true, @@ -24341,16 +25357,16 @@ "supports_web_search": true }, "xai/grok-4-1-fast-reasoning-latest": { - "cache_read_input_token_cost": 0.05e-6, - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, "litellm_provider": "xai", - "max_input_tokens": 2e6, - "max_output_tokens": 2e6, - "max_tokens": 2e6, + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, "mode": "chat", - "output_cost_per_token": 0.5e-6, - "output_cost_per_token_above_128k_tokens": 1e-6, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", "supports_audio_input": true, "supports_function_calling": true, @@ -24361,16 +25377,16 @@ "supports_web_search": true }, "xai/grok-4-1-fast-non-reasoning": { - "cache_read_input_token_cost": 0.05e-6, - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, "litellm_provider": "xai", - "max_input_tokens": 2e6, - "max_output_tokens": 2e6, - "max_tokens": 2e6, + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, "mode": "chat", - "output_cost_per_token": 0.5e-6, - "output_cost_per_token_above_128k_tokens": 1e-6, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, "source": "https://docs.x.ai/docs/models/grok-4-1-fast-non-reasoning", "supports_audio_input": true, "supports_function_calling": true, @@ -24380,16 +25396,16 @@ "supports_web_search": true }, "xai/grok-4-1-fast-non-reasoning-latest": { - "cache_read_input_token_cost": 0.05e-6, - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, "litellm_provider": "xai", - "max_input_tokens": 2e6, - "max_output_tokens": 2e6, - "max_tokens": 2e6, + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, "mode": "chat", - "output_cost_per_token": 0.5e-6, - "output_cost_per_token_above_128k_tokens": 1e-6, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, "source": "https://docs.x.ai/docs/models/grok-4-1-fast-non-reasoning", "supports_audio_input": true, "supports_function_calling": true, @@ -24399,13 +25415,13 @@ "supports_web_search": true }, "xai/grok-beta": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true, @@ -24419,7 +25435,7 @@ "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_reasoning": true, @@ -24433,7 +25449,7 @@ "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_reasoning": true, @@ -24447,28 +25463,117 @@ "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true }, "xai/grok-vision-beta": { - "input_cost_per_image": 5e-6, - "input_cost_per_token": 5e-6, + "input_cost_per_image": 0.000005, + "input_cost_per_token": 0.000005, "litellm_provider": "xai", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true, "supports_web_search": true }, + "zai/glm-4.6": { + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000022, + "litellm_provider": "zai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5": { + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000022, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5v": { + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000018, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-x": { + "input_cost_per_token": 0.0000022, + "output_cost_per_token": 0.0000089, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-air": { + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.0000011, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-airx": { + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000045, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4-32b-0414-128k": { + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-flash": { + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, "vertex_ai/search_api": { - "input_cost_per_query": 1.5e-3, + "input_cost_per_query": 0.0015, "litellm_provider": "vertex_ai", "mode": "vector_store" },