From 5d304665d21c6553c1dba411f48e4a612c8caaba Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Fri, 5 Dec 2025 19:24:45 +0100 Subject: [PATCH 1/7] =?UTF-8?q?=F0=9F=A4=96=20feat:=20add=20GPT-5.1-Codex-?= =?UTF-8?q?Max=20model=20with=20xhigh=20reasoning=20level?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for OpenAI's gpt-5.1-codex-max model which introduces a new 'xhigh' (Extra High) reasoning effort level for extended deep thinking. Changes: - Extended ThinkingLevel type to include 'xhigh' as 5th option - Added GPT_CODEX_MAX to known models with codex-max alias - Updated thinking policy to return 5 levels for codex-max only - Added xhigh to UI level descriptions and command palette - Added model pricing/capability data to models-extra - Updated SendMessageOptionsSchema to accept xhigh - Added comprehensive tests for codex-max policy The xhigh level is exclusive to gpt-5.1-codex-max. Other models gracefully fall back to their maximum supported level when xhigh is requested. Change-Id: Iab7ba7187703e275c4c0aa76779381dff4006316 Signed-off-by: Thomas Kosiewski --- src/browser/components/ChatInput/index.tsx | 1 + src/browser/utils/commands/sources.ts | 3 +- src/browser/utils/thinking/policy.test.ts | 56 ++++++++++++++++++++++ src/browser/utils/thinking/policy.ts | 11 ++++- src/common/constants/knownModels.ts | 7 +++ src/common/orpc/schemas/stream.ts | 2 +- src/common/types/thinking.ts | 7 ++- src/common/utils/ai/providerOptions.ts | 7 ++- src/common/utils/tokens/models-extra.ts | 18 +++++++ 9 files changed, 105 insertions(+), 7 deletions(-) diff --git a/src/browser/components/ChatInput/index.tsx b/src/browser/components/ChatInput/index.tsx index 235bacd253..3bf43dd03e 100644 --- a/src/browser/components/ChatInput/index.tsx +++ b/src/browser/components/ChatInput/index.tsx @@ -515,6 +515,7 @@ export const ChatInput: React.FC = (props) => { low: "Low — adds light reasoning", medium: "Medium — balanced reasoning", high: "High — maximum reasoning depth", + xhigh: "Extra High — extended deep thinking", }; setToast({ diff --git a/src/browser/utils/commands/sources.ts b/src/browser/utils/commands/sources.ts index 819bb7a8ad..01791e58b6 100644 --- a/src/browser/utils/commands/sources.ts +++ b/src/browser/utils/commands/sources.ts @@ -50,7 +50,7 @@ export interface BuildSourcesParams { onOpenSettings?: (section?: string) => void; } -const THINKING_LEVELS: ThinkingLevel[] = ["off", "low", "medium", "high"]; +const THINKING_LEVELS: ThinkingLevel[] = ["off", "low", "medium", "high", "xhigh"]; /** * Command palette section names @@ -431,6 +431,7 @@ export function buildCoreSources(p: BuildSourcesParams): Array<() => CommandActi low: "Low — add a bit of reasoning", medium: "Medium — balanced reasoning", high: "High — maximum reasoning depth", + xhigh: "Extra High — extended deep thinking", }; const currentLevel = p.getThinkingLevel(workspaceId); diff --git a/src/browser/utils/thinking/policy.test.ts b/src/browser/utils/thinking/policy.test.ts index 86bc4a9030..3de1b78230 100644 --- a/src/browser/utils/thinking/policy.test.ts +++ b/src/browser/utils/thinking/policy.test.ts @@ -2,6 +2,36 @@ import { describe, expect, test } from "bun:test"; import { getThinkingPolicyForModel, enforceThinkingPolicy } from "./policy"; describe("getThinkingPolicyForModel", () => { + test("returns 5 levels including xhigh for gpt-5.1-codex-max", () => { + expect(getThinkingPolicyForModel("openai:gpt-5.1-codex-max")).toEqual([ + "off", + "low", + "medium", + "high", + "xhigh", + ]); + }); + + test("returns 5 levels for gpt-5.1-codex-max with version suffix", () => { + expect(getThinkingPolicyForModel("openai:gpt-5.1-codex-max-2025-12-01")).toEqual([ + "off", + "low", + "medium", + "high", + "xhigh", + ]); + }); + + test("returns 5 levels for gpt-5.1-codex-max with whitespace after colon", () => { + expect(getThinkingPolicyForModel("openai: gpt-5.1-codex-max")).toEqual([ + "off", + "low", + "medium", + "high", + "xhigh", + ]); + }); + test("returns single HIGH for gpt-5-pro base model", () => { expect(getThinkingPolicyForModel("openai:gpt-5-pro")).toEqual(["high"]); }); @@ -111,6 +141,32 @@ describe("enforceThinkingPolicy", () => { expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("off"); }); }); + + describe("GPT-5.1-Codex-Max (5 levels including xhigh)", () => { + test("allows all 5 levels including xhigh", () => { + expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "off")).toBe("off"); + expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "low")).toBe("low"); + expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "medium")).toBe("medium"); + expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "high")).toBe("high"); + expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "xhigh")).toBe("xhigh"); + }); + + test("allows xhigh for versioned model", () => { + expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max-2025-12-01", "xhigh")).toBe("xhigh"); + }); + }); + + describe("xhigh fallback for non-codex-max models", () => { + test("falls back to medium when xhigh requested on standard model", () => { + // Standard models don't support xhigh, so fall back to medium (preferred fallback) + expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "xhigh")).toBe("medium"); + }); + + test("falls back to high when xhigh requested on gpt-5-pro", () => { + // gpt-5-pro only supports high, so xhigh falls back to high + expect(enforceThinkingPolicy("openai:gpt-5-pro", "xhigh")).toBe("high"); + }); + }); }); // Note: Tests for invalid levels removed - TypeScript type system prevents invalid diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts index 4346d9272d..9bb03eafde 100644 --- a/src/browser/utils/thinking/policy.ts +++ b/src/browser/utils/thinking/policy.ts @@ -24,14 +24,21 @@ export type ThinkingPolicy = readonly ThinkingLevel[]; * Returns the thinking policy for a given model. * * Rules: + * - openai:gpt-5.1-codex-max → ["off", "low", "medium", "high", "xhigh"] (5 levels including xhigh) * - openai:gpt-5-pro → ["high"] (only supported level) * - gemini-3 → ["low", "high"] (thinking level only) - * - default → ["off", "low", "medium", "high"] (all levels selectable) + * - default → ["off", "low", "medium", "high"] (standard 4 levels) * * Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06). * Does NOT match gpt-5-pro-mini (uses negative lookahead). */ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy { + // GPT-5.1-Codex-Max supports 5 reasoning levels including xhigh (Extra High) + // Match "openai:" followed by optional whitespace and "gpt-5.1-codex-max" + if (/^openai:\s*gpt-5\.1-codex-max/.test(modelString)) { + return ["off", "low", "medium", "high", "xhigh"]; + } + // Match "openai:" followed by optional whitespace and "gpt-5-pro" // Allow version suffixes like "-2025-10-06" but NOT "-mini" or other text suffixes if (/^openai:\s*gpt-5-pro(?!-[a-z])/.test(modelString)) { @@ -43,7 +50,7 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy { return ["low", "high"]; } - // Default policy: all levels selectable + // Default policy: standard 4 levels (xhigh only for codex-max) return ["off", "low", "medium", "high"]; } diff --git a/src/common/constants/knownModels.ts b/src/common/constants/knownModels.ts index 77caa2adfd..e11cb7a9a0 100644 --- a/src/common/constants/knownModels.ts +++ b/src/common/constants/knownModels.ts @@ -70,6 +70,13 @@ const MODEL_DEFINITIONS = { providerModelId: "gpt-5.1-codex-mini", aliases: ["codex-mini"], }, + GPT_CODEX_MAX: { + provider: "openai", + providerModelId: "gpt-5.1-codex-max", + aliases: ["codex-max"], + warm: true, + tokenizerOverride: "openai/gpt-5", + }, GEMINI_3_PRO: { provider: "google", providerModelId: "gemini-3-pro-preview", diff --git a/src/common/orpc/schemas/stream.ts b/src/common/orpc/schemas/stream.ts index f8c8ff7550..9a2d6b092c 100644 --- a/src/common/orpc/schemas/stream.ts +++ b/src/common/orpc/schemas/stream.ts @@ -313,7 +313,7 @@ export const ToolPolicySchema = z.array(ToolPolicyFilterSchema).meta({ // SendMessage options export const SendMessageOptionsSchema = z.object({ editMessageId: z.string().optional(), - thinkingLevel: z.enum(["off", "low", "medium", "high"]).optional(), + thinkingLevel: z.enum(["off", "low", "medium", "high", "xhigh"]).optional(), model: z.string("No model specified"), toolPolicy: ToolPolicySchema.optional(), additionalSystemInstructions: z.string().optional(), diff --git a/src/common/types/thinking.ts b/src/common/types/thinking.ts index f6283d067a..a30d258793 100644 --- a/src/common/types/thinking.ts +++ b/src/common/types/thinking.ts @@ -5,7 +5,7 @@ * different AI providers (Anthropic, OpenAI, etc.) */ -export type ThinkingLevel = "off" | "low" | "medium" | "high"; +export type ThinkingLevel = "off" | "low" | "medium" | "high" | "xhigh"; /** * Active thinking levels (excludes "off") @@ -30,6 +30,7 @@ export const ANTHROPIC_THINKING_BUDGETS: Record = { low: 4000, medium: 10000, high: 20000, + xhigh: 20000, // Same as high - Anthropic doesn't support xhigh }; /** @@ -47,6 +48,7 @@ export const ANTHROPIC_EFFORT: Record low: "low", medium: "medium", high: "high", + xhigh: "high", // Fallback to high - Anthropic doesn't support xhigh }; /** @@ -66,6 +68,7 @@ export const OPENAI_REASONING_EFFORT: Record low: "low", medium: "medium", high: "high", + xhigh: "xhigh", // Extra High - only supported by gpt-5.1-codex-max }; /** @@ -83,6 +86,7 @@ export const GEMINI_THINKING_BUDGETS: Record = { low: 2048, medium: 8192, high: 16384, // Conservative max (some models go to 32k) + xhigh: 16384, // Same as high - Gemini doesn't support xhigh } as const; export const OPENROUTER_REASONING_EFFORT: Record< ThinkingLevel, @@ -92,4 +96,5 @@ export const OPENROUTER_REASONING_EFFORT: Record< low: "low", medium: "medium", high: "high", + xhigh: "high", // Fallback to high - OpenRouter doesn't support xhigh }; diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index 202c5b8e4f..b1d3b19037 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -254,8 +254,11 @@ export function buildProviderOptions( }; if (isGemini3) { - // Gemini 3 uses thinkingLevel (low/high) - thinkingConfig.thinkingLevel = effectiveThinking === "medium" ? "low" : effectiveThinking; + // Gemini 3 uses thinkingLevel (low/high) - map medium/xhigh to supported values + thinkingConfig.thinkingLevel = + effectiveThinking === "medium" || effectiveThinking === "xhigh" + ? "high" + : effectiveThinking; } else { // Gemini 2.5 uses thinkingBudget const budget = GEMINI_THINKING_BUDGETS[effectiveThinking]; diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts index 6e496c6ced..d64ae43bc2 100644 --- a/src/common/utils/tokens/models-extra.ts +++ b/src/common/utils/tokens/models-extra.ts @@ -88,4 +88,22 @@ export const modelsExtra: Record = { supports_reasoning: true, supports_response_schema: true, }, + + // GPT-5.1-Codex-Max - Extended reasoning model with xhigh support + // Pricing TBD - using estimated values based on Codex pricing pattern + // Supports 5 reasoning levels: off, low, medium, high, xhigh + "gpt-5.1-codex-max": { + max_input_tokens: 400000, // Estimated based on compaction capability + max_output_tokens: 272000, // Same as gpt-5-pro + input_cost_per_token: 0.00002, // $20/M - placeholder estimate + output_cost_per_token: 0.00008, // $80/M - placeholder estimate + litellm_provider: "openai", + mode: "chat", + supports_function_calling: true, + supports_vision: true, + supports_reasoning: true, + supports_response_schema: true, + knowledge_cutoff: "2025-06-30", // Estimated + supported_endpoints: ["/v1/responses"], + }, }; From bc841f90b56fb5753c2979ef676173af9c8daf91 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Fri, 5 Dec 2025 19:28:20 +0100 Subject: [PATCH 2/7] fix: correct gpt-5.1-codex-max pricing to match codex Same pricing as gpt-5.1-codex: $1.25/M input, $10/M output Also aligned max token limits with the codex model. Change-Id: I030014df05a5ccae62f5c93d7435ec2363d23317 Signed-off-by: Thomas Kosiewski --- src/common/orpc/schemas/telemetry.ts | 2 +- src/common/telemetry/payload.ts | 2 +- src/common/utils/tokens/models-extra.ts | 11 +++++------ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/common/orpc/schemas/telemetry.ts b/src/common/orpc/schemas/telemetry.ts index 6dec67c06c..f57156cf58 100644 --- a/src/common/orpc/schemas/telemetry.ts +++ b/src/common/orpc/schemas/telemetry.ts @@ -29,7 +29,7 @@ const FrontendPlatformInfoSchema = z.object({ }); // Thinking level enum (matches payload.ts TelemetryThinkingLevel) -const TelemetryThinkingLevelSchema = z.enum(["off", "low", "medium", "high"]); +const TelemetryThinkingLevelSchema = z.enum(["off", "low", "medium", "high", "xhigh"]); // Command type enum (matches payload.ts TelemetryCommandType) const TelemetryCommandTypeSchema = z.enum([ diff --git a/src/common/telemetry/payload.ts b/src/common/telemetry/payload.ts index 45c42415fc..d70aeff0af 100644 --- a/src/common/telemetry/payload.ts +++ b/src/common/telemetry/payload.ts @@ -86,7 +86,7 @@ export interface WorkspaceSwitchedPayload { /** * Thinking level for extended thinking feature */ -export type TelemetryThinkingLevel = "off" | "low" | "medium" | "high"; +export type TelemetryThinkingLevel = "off" | "low" | "medium" | "high" | "xhigh"; /** * Chat/AI interaction events diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts index d64ae43bc2..62f753e1d2 100644 --- a/src/common/utils/tokens/models-extra.ts +++ b/src/common/utils/tokens/models-extra.ts @@ -90,20 +90,19 @@ export const modelsExtra: Record = { }, // GPT-5.1-Codex-Max - Extended reasoning model with xhigh support - // Pricing TBD - using estimated values based on Codex pricing pattern + // Same pricing as gpt-5.1-codex: $1.25/M input, $10/M output // Supports 5 reasoning levels: off, low, medium, high, xhigh "gpt-5.1-codex-max": { - max_input_tokens: 400000, // Estimated based on compaction capability - max_output_tokens: 272000, // Same as gpt-5-pro - input_cost_per_token: 0.00002, // $20/M - placeholder estimate - output_cost_per_token: 0.00008, // $80/M - placeholder estimate + max_input_tokens: 272000, // Same as gpt-5.1-codex + max_output_tokens: 128000, // Same as gpt-5.1-codex + input_cost_per_token: 0.00000125, // $1.25 per million input tokens + output_cost_per_token: 0.00001, // $10 per million output tokens litellm_provider: "openai", mode: "chat", supports_function_calling: true, supports_vision: true, supports_reasoning: true, supports_response_schema: true, - knowledge_cutoff: "2025-06-30", // Estimated supported_endpoints: ["/v1/responses"], }, }; From c86ae7ba5d770b4af87fa2d435e7e4238d7cb7e7 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 6 Dec 2025 14:23:18 -0600 Subject: [PATCH 3/7] fix: style xhigh thinking slider glow --- src/browser/components/ThinkingSlider.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/browser/components/ThinkingSlider.tsx b/src/browser/components/ThinkingSlider.tsx index ad2f4b50cd..225af480dd 100644 --- a/src/browser/components/ThinkingSlider.tsx +++ b/src/browser/components/ThinkingSlider.tsx @@ -126,7 +126,10 @@ export const ThinkingSliderComponent: React.FC = ({ modelS // For styling, we still want to map to the "global" intensity 0-3 // to keep colors consistent (e.g. "high" is always purple, even if it's step 1 of 2) const globalLevelIndex = ["off", "low", "medium", "high"].indexOf(thinkingLevel); - const visualValue = globalLevelIndex === -1 ? 0 : globalLevelIndex; + const visualValue = (() => { + if (thinkingLevel === "xhigh") return 3; // reuse HIGH styling/glow for XHIGH + return globalLevelIndex === -1 ? 0 : globalLevelIndex; + })(); const sliderStyles = getSliderStyles(visualValue, isHovering); const textStyle = getTextStyle(visualValue); From 12041fd13f66b03f013cc5dfb1f767821a152ec6 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 6 Dec 2025 14:24:03 -0600 Subject: [PATCH 4/7] refactor: clamp thinking slider visuals for extra levels --- src/browser/components/ThinkingSlider.tsx | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/browser/components/ThinkingSlider.tsx b/src/browser/components/ThinkingSlider.tsx index 225af480dd..d05124c097 100644 --- a/src/browser/components/ThinkingSlider.tsx +++ b/src/browser/components/ThinkingSlider.tsx @@ -123,12 +123,13 @@ export const ThinkingSliderComponent: React.FC = ({ modelS const sliderValue = currentIndex === -1 ? 0 : currentIndex; const maxSteps = allowed.length - 1; - // For styling, we still want to map to the "global" intensity 0-3 - // to keep colors consistent (e.g. "high" is always purple, even if it's step 1 of 2) - const globalLevelIndex = ["off", "low", "medium", "high"].indexOf(thinkingLevel); + // Map levels to visual intensity indices (0-3) so colors/glow stay consistent + // Levels outside the base 4 (e.g., xhigh) map to the strongest intensity + const baseVisualOrder: ThinkingLevel[] = ["off", "low", "medium", "high"]; const visualValue = (() => { - if (thinkingLevel === "xhigh") return 3; // reuse HIGH styling/glow for XHIGH - return globalLevelIndex === -1 ? 0 : globalLevelIndex; + const idx = baseVisualOrder.indexOf(thinkingLevel); + if (idx >= 0) return idx; + return baseVisualOrder.length - 1; // clamp extras (e.g., xhigh) to strongest glow })(); const sliderStyles = getSliderStyles(visualValue, isHovering); From e0bfa20b629d75901d0e05e440990286e56d9403 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 6 Dec 2025 14:29:35 -0600 Subject: [PATCH 5/7] chore: refresh models and normalize thinking policy --- src/browser/utils/thinking/policy.test.ts | 20 + src/browser/utils/thinking/policy.ts | 17 +- src/common/utils/tokens/models.json | 9960 ++++++++++++++------- 3 files changed, 6623 insertions(+), 3374 deletions(-) diff --git a/src/browser/utils/thinking/policy.test.ts b/src/browser/utils/thinking/policy.test.ts index 3de1b78230..6ec91aeeda 100644 --- a/src/browser/utils/thinking/policy.test.ts +++ b/src/browser/utils/thinking/policy.test.ts @@ -22,6 +22,26 @@ describe("getThinkingPolicyForModel", () => { ]); }); + test("returns 5 levels for bare gpt-5.1-codex-max without prefix", () => { + expect(getThinkingPolicyForModel("gpt-5.1-codex-max")).toEqual([ + "off", + "low", + "medium", + "high", + "xhigh", + ]); + }); + + test("returns 5 levels for codex-max alias", () => { + expect(getThinkingPolicyForModel("codex-max")).toEqual([ + "off", + "low", + "medium", + "high", + "xhigh", + ]); + }); + test("returns 5 levels for gpt-5.1-codex-max with whitespace after colon", () => { expect(getThinkingPolicyForModel("openai: gpt-5.1-codex-max")).toEqual([ "off", diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts index 9bb03eafde..c81b420237 100644 --- a/src/browser/utils/thinking/policy.ts +++ b/src/browser/utils/thinking/policy.ts @@ -33,20 +33,25 @@ export type ThinkingPolicy = readonly ThinkingLevel[]; * Does NOT match gpt-5-pro-mini (uses negative lookahead). */ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy { + // Normalize to be robust to provider prefixes, whitespace, and version suffixes + const normalized = modelString.trim().toLowerCase(); + const withoutPrefix = normalized.replace(/^[a-z0-9_-]+:\s*/, ""); + // GPT-5.1-Codex-Max supports 5 reasoning levels including xhigh (Extra High) - // Match "openai:" followed by optional whitespace and "gpt-5.1-codex-max" - if (/^openai:\s*gpt-5\.1-codex-max/.test(modelString)) { + if ( + withoutPrefix.startsWith("gpt-5.1-codex-max") || + withoutPrefix.startsWith("codex-max") + ) { return ["off", "low", "medium", "high", "xhigh"]; } - // Match "openai:" followed by optional whitespace and "gpt-5-pro" - // Allow version suffixes like "-2025-10-06" but NOT "-mini" or other text suffixes - if (/^openai:\s*gpt-5-pro(?!-[a-z])/.test(modelString)) { + // gpt-5-pro (not mini) with optional version suffix + if (/^gpt-5-pro(?!-[a-z])/.test(withoutPrefix)) { return ["high"]; } // Gemini 3 Pro only supports "low" and "high" reasoning levels - if (modelString.includes("gemini-3")) { + if (withoutPrefix.includes("gemini-3")) { return ["low", "high"]; } diff --git a/src/common/utils/tokens/models.json b/src/common/utils/tokens/models.json index 855e0ae542..b2d757ddf0 100644 --- a/src/common/utils/tokens/models.json +++ b/src/common/utils/tokens/models.json @@ -1,26 +1,32 @@ { "sample_spec": { - "code_interpreter_cost_per_session": 0.0, - "computer_use_input_cost_per_1k_tokens": 0.0, - "computer_use_output_cost_per_1k_tokens": 0.0, + "code_interpreter_cost_per_session": 0, + "computer_use_input_cost_per_1k_tokens": 0, + "computer_use_output_cost_per_1k_tokens": 0, "deprecation_date": "date when the model becomes deprecated in the format YYYY-MM-DD", - "file_search_cost_per_1k_calls": 0.0, - "file_search_cost_per_gb_per_day": 0.0, - "input_cost_per_audio_token": 0.0, - "input_cost_per_token": 0.0, + "file_search_cost_per_1k_calls": 0, + "file_search_cost_per_gb_per_day": 0, + "input_cost_per_audio_token": 0, + "input_cost_per_token": 0, "litellm_provider": "one of https://docs.litellm.ai/docs/providers", "max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens", "max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens", "max_tokens": "LEGACY parameter. set to max_output_tokens if provider specifies it. IF not set to max_input_tokens, if provider specifies it.", "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank, search", - "output_cost_per_reasoning_token": 0.0, - "output_cost_per_token": 0.0, + "output_cost_per_reasoning_token": 0, + "output_cost_per_token": 0, "search_context_cost_per_query": { - "search_context_size_high": 0.0, - "search_context_size_low": 0.0, - "search_context_size_medium": 0.0 + "search_context_size_high": 0, + "search_context_size_low": 0, + "search_context_size_medium": 0 }, - "supported_regions": ["global", "us-west-2", "eu-west-1", "ap-southeast-1", "ap-northeast-1"], + "supported_regions": [ + "global", + "us-west-2", + "eu-west-1", + "ap-southeast-1", + "ap-northeast-1" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -31,7 +37,7 @@ "supports_system_messages": true, "supports_vision": true, "supports_web_search": true, - "vector_store_cost_per_gb_per_day": 0.0 + "vector_store_cost_per_gb_per_day": 0 }, "1024-x-1024/50-steps/bedrock/amazon.nova-canvas-v1:0": { "litellm_provider": "bedrock", @@ -50,7 +56,7 @@ "input_cost_per_pixel": 1.9e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "1024-x-1024/max-steps/stability.stable-diffusion-xl-v1": { "litellm_provider": "bedrock", @@ -63,7 +69,7 @@ "input_cost_per_pixel": 2.4414e-7, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { "litellm_provider": "bedrock", @@ -76,7 +82,7 @@ "input_cost_per_pixel": 6.86e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "512-x-512/max-steps/stability.stable-diffusion-xl-v0": { "litellm_provider": "bedrock", @@ -86,31 +92,31 @@ "output_cost_per_image": 0.036 }, "ai21.j2-mid-v1": { - "input_cost_per_token": 1.25e-5, + "input_cost_per_token": 0.0000125, "litellm_provider": "bedrock", "max_input_tokens": 8191, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 1.25e-5 + "output_cost_per_token": 0.0000125 }, "ai21.j2-ultra-v1": { - "input_cost_per_token": 1.88e-5, + "input_cost_per_token": 0.0000188, "litellm_provider": "bedrock", "max_input_tokens": 8191, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 1.88e-5 + "output_cost_per_token": 0.0000188 }, "ai21.jamba-1-5-large-v1:0": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "bedrock", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 8e-6 + "output_cost_per_token": 0.000008 }, "ai21.jamba-1-5-mini-v1:0": { "input_cost_per_token": 2e-7, @@ -139,7 +145,9 @@ "mode": "image_generation", "output_cost_per_image": 0.021, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/dall-e-3": { "litellm_provider": "aiml", @@ -149,7 +157,9 @@ "mode": "image_generation", "output_cost_per_image": 0.042, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux-pro": { "litellm_provider": "aiml", @@ -159,19 +169,25 @@ "mode": "image_generation", "output_cost_per_image": 0.053, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux-pro/v1.1": { "litellm_provider": "aiml", "mode": "image_generation", "output_cost_per_image": 0.042, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux-pro/v1.1-ultra": { "litellm_provider": "aiml", "mode": "image_generation", "output_cost_per_image": 0.063, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux-realism": { "litellm_provider": "aiml", @@ -181,7 +197,9 @@ "mode": "image_generation", "output_cost_per_image": 0.037, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux/dev": { "litellm_provider": "aiml", @@ -191,7 +209,9 @@ "mode": "image_generation", "output_cost_per_image": 0.026, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux/kontext-max/text-to-image": { "litellm_provider": "aiml", @@ -201,7 +221,9 @@ "mode": "image_generation", "output_cost_per_image": 0.084, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux/kontext-pro/text-to-image": { "litellm_provider": "aiml", @@ -211,7 +233,9 @@ "mode": "image_generation", "output_cost_per_image": 0.042, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux/schnell": { "litellm_provider": "aiml", @@ -221,7 +245,15 @@ "mode": "image_generation", "output_cost_per_image": 0.003, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "amazon.nova-canvas-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 2600, + "mode": "image_generation", + "output_cost_per_image": 0.06 }, "amazon.nova-lite-v1:0": { "input_cost_per_token": 6e-8, @@ -237,6 +269,70 @@ "supports_response_schema": true, "supports_vision": true }, + "amazon.nova-2-lite-v1:0": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "apac.amazon.nova-2-lite-v1:0": { + "input_cost_per_token": 6e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.00000275, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "eu.amazon.nova-2-lite-v1:0": { + "input_cost_per_token": 6e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.00000275, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "us.amazon.nova-2-lite-v1:0": { + "input_cost_per_token": 6e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.00000275, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, "amazon.nova-micro-v1:0": { "input_cost_per_token": 3.5e-8, "litellm_provider": "bedrock_converse", @@ -256,7 +352,7 @@ "max_output_tokens": 10000, "max_tokens": 10000, "mode": "chat", - "output_cost_per_token": 3.2e-6, + "output_cost_per_token": 0.0000032, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, @@ -265,7 +361,7 @@ }, "amazon.rerank-v1:0": { "input_cost_per_query": 0.001, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "bedrock", "max_document_chunks_per_query": 100, "max_input_tokens": 32000, @@ -274,10 +370,10 @@ "max_tokens": 32000, "max_tokens_per_document_chunk": 512, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "amazon.titan-embed-image-v1": { - "input_cost_per_image": 6e-5, + "input_cost_per_image": 0.00006, "input_cost_per_token": 8e-7, "litellm_provider": "bedrock", "max_input_tokens": 128, @@ -286,7 +382,7 @@ "notes": "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead." }, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024, "source": "https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=amazon.titan-image-generator-v1", "supports_embedding_image_input": true, @@ -298,7 +394,7 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1536 }, "amazon.titan-embed-text-v2:0": { @@ -307,11 +403,11 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024 }, "amazon.titan-image-generator-v1": { - "input_cost_per_image": 0.0, + "input_cost_per_image": 0, "output_cost_per_image": 0.008, "output_cost_per_image_premium_image": 0.01, "output_cost_per_image_above_512_and_512_pixels": 0.01, @@ -320,7 +416,16 @@ "mode": "image_generation" }, "amazon.titan-image-generator-v2": { - "input_cost_per_image": 0.0, + "input_cost_per_image": 0, + "output_cost_per_image": 0.008, + "output_cost_per_image_premium_image": 0.01, + "output_cost_per_image_above_1024_and_1024_pixels": 0.01, + "output_cost_per_image_above_1024_and_1024_pixels_and_premium_image": 0.012, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "amazon.titan-image-generator-v2:0": { + "input_cost_per_image": 0, "output_cost_per_image": 0.008, "output_cost_per_image_premium_image": 0.01, "output_cost_per_image_above_1024_and_1024_pixels": 0.01, @@ -329,18 +434,18 @@ "mode": "image_generation" }, "twelvelabs.marengo-embed-2-7-v1:0": { - "input_cost_per_token": 7e-5, + "input_cost_per_token": 0.00007, "litellm_provider": "bedrock", "max_input_tokens": 77, "max_tokens": 77, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024, "supports_embedding_image_input": true, "supports_image_input": true }, "us.twelvelabs.marengo-embed-2-7-v1:0": { - "input_cost_per_token": 7e-5, + "input_cost_per_token": 0.00007, "input_cost_per_video_per_second": 0.0007, "input_cost_per_audio_per_second": 0.00014, "input_cost_per_image": 0.0001, @@ -348,13 +453,13 @@ "max_input_tokens": 77, "max_tokens": 77, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024, "supports_embedding_image_input": true, "supports_image_input": true }, "eu.twelvelabs.marengo-embed-2-7-v1:0": { - "input_cost_per_token": 7e-5, + "input_cost_per_token": 0.00007, "input_cost_per_video_per_second": 0.0007, "input_cost_per_audio_per_second": 0.00014, "input_cost_per_image": 0.0001, @@ -362,40 +467,40 @@ "max_input_tokens": 77, "max_tokens": 77, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024, "supports_embedding_image_input": true, "supports_image_input": true }, "twelvelabs.pegasus-1-2-v1:0": { "input_cost_per_video_per_second": 0.00049, - "output_cost_per_token": 7.5e-6, + "output_cost_per_token": 0.0000075, "litellm_provider": "bedrock", "mode": "chat", "supports_video_input": true }, "us.twelvelabs.pegasus-1-2-v1:0": { "input_cost_per_video_per_second": 0.00049, - "output_cost_per_token": 7.5e-6, + "output_cost_per_token": 0.0000075, "litellm_provider": "bedrock", "mode": "chat", "supports_video_input": true }, "eu.twelvelabs.pegasus-1-2-v1:0": { "input_cost_per_video_per_second": 0.00049, - "output_cost_per_token": 7.5e-6, + "output_cost_per_token": 0.0000075, "litellm_provider": "bedrock", "mode": "chat", "supports_video_input": true }, "amazon.titan-text-express-v1": { - "input_cost_per_token": 1.3e-6, + "input_cost_per_token": 0.0000013, "litellm_provider": "bedrock", "max_input_tokens": 42000, "max_output_tokens": 8000, "max_tokens": 8000, "mode": "chat", - "output_cost_per_token": 1.7e-6 + "output_cost_per_token": 0.0000017 }, "amazon.titan-text-lite-v1": { "input_cost_per_token": 3e-7, @@ -413,10 +518,10 @@ "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 1.5e-6 + "output_cost_per_token": 0.0000015 }, "anthropic.claude-3-5-haiku-20241022-v1:0": { - "cache_creation_input_token_cost": 1e-6, + "cache_creation_input_token_cost": 0.000001, "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 8e-7, "litellm_provider": "bedrock", @@ -424,7 +529,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -433,15 +538,15 @@ "supports_tool_choice": true }, "anthropic.claude-haiku-4-5-20251001-v1:0": { - "cache_creation_input_token_cost": 1.25e-6, + "cache_creation_input_token_cost": 0.00000125, "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, "supports_computer_use": true, @@ -455,15 +560,15 @@ "tool_use_system_prompt_tokens": 346 }, "anthropic.claude-haiku-4-5@20251001": { - "cache_creation_input_token_cost": 1.25e-6, + "cache_creation_input_token_cost": 0.00000125, "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, "supports_computer_use": true, @@ -477,13 +582,13 @@ "tool_use_system_prompt_tokens": 346 }, "anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -491,15 +596,15 @@ "supports_vision": true }, "anthropic.claude-3-5-sonnet-20241022-v2:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -510,15 +615,15 @@ "supports_vision": true }, "anthropic.claude-3-7-sonnet-20240620-v1:0": { - "cache_creation_input_token_cost": 4.5e-6, + "cache_creation_input_token_cost": 0.0000045, "cache_read_input_token_cost": 3.6e-7, - "input_cost_per_token": 3.6e-6, + "input_cost_per_token": 0.0000036, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.8e-5, + "output_cost_per_token": 0.000018, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -530,15 +635,15 @@ "supports_vision": true }, "anthropic.claude-3-7-sonnet-20250219-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -556,7 +661,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -564,26 +669,26 @@ "supports_vision": true }, "anthropic.claude-3-opus-20240229-v1:0": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true }, "anthropic.claude-3-sonnet-20240229-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -597,19 +702,19 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-6, + "output_cost_per_token": 0.0000024, "supports_tool_choice": true }, "anthropic.claude-opus-4-1-20250805-v1:0": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -627,15 +732,41 @@ "tool_use_system_prompt_tokens": 159 }, "anthropic.claude-opus-4-20250514-v1:0": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -653,19 +784,19 @@ "tool_use_system_prompt_tokens": 159 }, "anthropic.claude-sonnet-4-20250514-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -683,19 +814,19 @@ "tool_use_system_prompt_tokens": 159 }, "anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -713,22 +844,22 @@ "tool_use_system_prompt_tokens": 159 }, "anthropic.claude-v1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5 + "output_cost_per_token": 0.000024 }, "anthropic.claude-v2:1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "anyscale/HuggingFaceH4/zephyr-7b-beta": { @@ -741,22 +872,22 @@ "output_cost_per_token": 1.5e-7 }, "anyscale/codellama/CodeLlama-34b-Instruct-hf": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "anyscale", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "anyscale/codellama/CodeLlama-70b-Instruct-hf": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "anyscale", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/codellama-CodeLlama-70b-Instruct-hf" }, "anyscale/google/gemma-7b-it": { @@ -779,13 +910,13 @@ "output_cost_per_token": 2.5e-7 }, "anyscale/meta-llama/Llama-2-70b-chat-hf": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "anyscale", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "anyscale/meta-llama/Llama-2-7b-chat-hf": { "input_cost_per_token": 1.5e-7, @@ -797,13 +928,13 @@ "output_cost_per_token": 1.5e-7 }, "anyscale/meta-llama/Meta-Llama-3-70B-Instruct": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "anyscale", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-70B-Instruct" }, "anyscale/meta-llama/Meta-Llama-3-8B-Instruct": { @@ -882,7 +1013,7 @@ "max_output_tokens": 10000, "max_tokens": 10000, "mode": "chat", - "output_cost_per_token": 3.36e-6, + "output_cost_per_token": 0.00000336, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, @@ -890,13 +1021,13 @@ "supports_vision": true }, "apac.anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -904,15 +1035,15 @@ "supports_vision": true }, "apac.anthropic.claude-3-5-sonnet-20241022-v2:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -929,7 +1060,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -937,15 +1068,15 @@ "supports_vision": true }, "apac.anthropic.claude-haiku-4-5-20251001-v1:0": { - "cache_creation_input_token_cost": 1.375e-6, + "cache_creation_input_token_cost": 0.000001375, "cache_read_input_token_cost": 1.1e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5.5e-6, + "output_cost_per_token": 0.0000055, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, "supports_computer_use": true, @@ -959,13 +1090,13 @@ "tool_use_system_prompt_tokens": 346 }, "apac.anthropic.claude-3-sonnet-20240229-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -973,19 +1104,19 @@ "supports_vision": true }, "apac.anthropic.claude-sonnet-4-20250514-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -1003,31 +1134,31 @@ "tool_use_system_prompt_tokens": 159 }, "assemblyai/best": { - "input_cost_per_second": 3.333e-5, + "input_cost_per_second": 0.00003333, "litellm_provider": "assemblyai", "mode": "audio_transcription", - "output_cost_per_second": 0.0 + "output_cost_per_second": 0 }, "assemblyai/nano": { "input_cost_per_second": 0.00010278, "litellm_provider": "assemblyai", "mode": "audio_transcription", - "output_cost_per_second": 0.0 + "output_cost_per_second": 0 }, "au.anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 4.125e-6, + "cache_creation_input_token_cost": 0.000004125, "cache_read_input_token_cost": 3.3e-7, - "input_cost_per_token": 3.3e-6, - "input_cost_per_token_above_200k_tokens": 6.6e-6, - "output_cost_per_token_above_200k_tokens": 2.475e-5, - "cache_creation_input_token_cost_above_200k_tokens": 8.25e-6, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.65e-5, + "output_cost_per_token": 0.0000165, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -1050,20 +1181,27 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/codex-mini": { "cache_read_input_token_cost": 3.75e-7, - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 6e-6, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000006, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -1075,26 +1213,87 @@ "supports_vision": true }, "azure/command-r-plus": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true }, + "azure/claude-haiku-4-5": { + "input_cost_per_token": 0.000001, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000005, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/claude-opus-4-1": { + "input_cost_per_token": 0.000015, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.000075, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/claude-sonnet-4-5": { + "input_cost_per_token": 0.000003, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, "azure/computer-use-preview": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "azure", "max_input_tokens": 8192, "max_output_tokens": 1024, "max_tokens": 1024, "mode": "chat", - "output_cost_per_token": 1.2e-5, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000012, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": false, @@ -1111,14 +1310,14 @@ }, "azure/eu/gpt-4o-2024-08-06": { "deprecation_date": "2026-02-27", - "cache_read_input_token_cost": 1.375e-6, - "input_cost_per_token": 2.75e-6, + "cache_read_input_token_cost": 0.000001375, + "input_cost_per_token": 0.00000275, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -1128,14 +1327,14 @@ }, "azure/eu/gpt-4o-2024-11-20": { "deprecation_date": "2026-03-01", - "cache_creation_input_token_cost": 1.38e-6, - "input_cost_per_token": 2.75e-6, + "cache_creation_input_token_cost": 0.00000138, + "input_cost_per_token": 0.00000275, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_response_schema": true, @@ -1161,15 +1360,15 @@ "azure/eu/gpt-4o-mini-realtime-preview-2024-12-17": { "cache_creation_input_audio_token_cost": 3.3e-7, "cache_read_input_token_cost": 3.3e-7, - "input_cost_per_audio_token": 1.1e-5, + "input_cost_per_audio_token": 0.000011, "input_cost_per_token": 6.6e-7, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 2.2e-5, - "output_cost_per_token": 2.64e-6, + "output_cost_per_audio_token": 0.000022, + "output_cost_per_token": 0.00000264, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -1178,17 +1377,17 @@ "supports_tool_choice": true }, "azure/eu/gpt-4o-realtime-preview-2024-10-01": { - "cache_creation_input_audio_token_cost": 2.2e-5, - "cache_read_input_token_cost": 2.75e-6, + "cache_creation_input_audio_token_cost": 0.000022, + "cache_read_input_token_cost": 0.00000275, "input_cost_per_audio_token": 0.00011, - "input_cost_per_token": 5.5e-6, + "input_cost_per_token": 0.0000055, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", "output_cost_per_audio_token": 0.00022, - "output_cost_per_token": 2.2e-5, + "output_cost_per_token": 0.000022, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -1197,19 +1396,25 @@ "supports_tool_choice": true }, "azure/eu/gpt-4o-realtime-preview-2024-12-17": { - "cache_read_input_audio_token_cost": 2.5e-6, - "cache_read_input_token_cost": 2.75e-6, - "input_cost_per_audio_token": 4.4e-5, - "input_cost_per_token": 5.5e-6, + "cache_read_input_audio_token_cost": 0.0000025, + "cache_read_input_token_cost": 0.00000275, + "input_cost_per_audio_token": 0.000044, + "input_cost_per_token": 0.0000055, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 2.2e-5, - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.000022, + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -1219,16 +1424,25 @@ }, "azure/eu/gpt-5-2025-08-07": { "cache_read_input_token_cost": 1.375e-7, - "input_cost_per_token": 1.375e-6, + "input_cost_per_token": 0.000001375, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000011, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1248,10 +1462,19 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2.2e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.0000022, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1265,16 +1488,26 @@ }, "azure/eu/gpt-5.1": { "cache_read_input_token_cost": 1.4e-7, - "input_cost_per_token": 1.38e-6, + "input_cost_per_token": 0.00000138, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "output_cost_per_token": 0.000011, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1288,16 +1521,26 @@ }, "azure/eu/gpt-5.1-chat": { "cache_read_input_token_cost": 1.4e-7, - "input_cost_per_token": 1.38e-6, + "input_cost_per_token": 0.00000138, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "output_cost_per_token": 0.000011, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1311,16 +1554,23 @@ }, "azure/eu/gpt-5.1-codex": { "cache_read_input_token_cost": 1.4e-7, - "input_cost_per_token": 1.38e-6, + "input_cost_per_token": 0.00000138, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1.1e-5, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000011, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1340,10 +1590,17 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 2.2e-6, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.0000022, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1364,9 +1621,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4.4e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1379,14 +1645,14 @@ "supports_vision": true }, "azure/eu/o1-2024-12-17": { - "cache_read_input_token_cost": 8.25e-6, - "input_cost_per_token": 1.65e-5, + "cache_read_input_token_cost": 0.00000825, + "input_cost_per_token": 0.0000165, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 6.6e-5, + "output_cost_per_token": 0.000066, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -1395,29 +1661,29 @@ }, "azure/eu/o1-mini-2024-09-12": { "cache_read_input_token_cost": 6.05e-7, - "input_cost_per_token": 1.21e-6, + "input_cost_per_token": 0.00000121, "input_cost_per_token_batches": 6.05e-7, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 4.84e-6, - "output_cost_per_token_batches": 2.42e-6, + "output_cost_per_token": 0.00000484, + "output_cost_per_token_batches": 0.00000242, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, "supports_vision": false }, "azure/eu/o1-preview-2024-09-12": { - "cache_read_input_token_cost": 8.25e-6, - "input_cost_per_token": 1.65e-5, + "cache_read_input_token_cost": 0.00000825, + "input_cost_per_token": 0.0000165, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6.6e-5, + "output_cost_per_token": 0.000066, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -1425,30 +1691,30 @@ }, "azure/eu/o3-mini-2025-01-31": { "cache_read_input_token_cost": 6.05e-7, - "input_cost_per_token": 1.21e-6, + "input_cost_per_token": 0.00000121, "input_cost_per_token_batches": 6.05e-7, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.84e-6, - "output_cost_per_token_batches": 2.42e-6, + "output_cost_per_token": 0.00000484, + "output_cost_per_token_batches": 0.00000242, "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, "supports_vision": false }, "azure/global-standard/gpt-4o-2024-08-06": { - "cache_read_input_token_cost": 1.25e-6, + "cache_read_input_token_cost": 0.00000125, "deprecation_date": "2026-02-27", - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -1457,15 +1723,15 @@ "supports_vision": true }, "azure/global-standard/gpt-4o-2024-11-20": { - "cache_read_input_token_cost": 1.25e-6, + "cache_read_input_token_cost": 0.00000125, "deprecation_date": "2026-03-01", - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_response_schema": true, @@ -1488,14 +1754,14 @@ }, "azure/global/gpt-4o-2024-08-06": { "deprecation_date": "2026-02-27", - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -1505,14 +1771,14 @@ }, "azure/global/gpt-4o-2024-11-20": { "deprecation_date": "2026-03-01", - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -1522,16 +1788,26 @@ }, "azure/global/gpt-5.1": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1545,16 +1821,26 @@ }, "azure/global/gpt-5.1-chat": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1568,16 +1854,23 @@ }, "azure/global/gpt-5.1-codex": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1597,10 +1890,17 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 2e-6, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000002, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1619,7 +1919,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_tool_choice": true }, @@ -1631,18 +1931,18 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "azure/gpt-3.5-turbo-instruct-0914": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "azure_text", "max_input_tokens": 4097, "max_tokens": 4097, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "azure/gpt-35-turbo": { "input_cost_per_token": 5e-7, @@ -1651,7 +1951,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_tool_choice": true }, @@ -1663,7 +1963,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true @@ -1676,122 +1976,122 @@ "max_output_tokens": 4096, "max_tokens": 4097, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "azure/gpt-35-turbo-0613": { "deprecation_date": "2025-02-13", - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "azure", "max_input_tokens": 4097, "max_output_tokens": 4096, "max_tokens": 4097, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "azure/gpt-35-turbo-1106": { "deprecation_date": "2025-03-31", - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "azure", "max_input_tokens": 16384, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "azure/gpt-35-turbo-16k": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "azure", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_tool_choice": true }, "azure/gpt-35-turbo-16k-0613": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "azure", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_function_calling": true, "supports_tool_choice": true }, "azure/gpt-35-turbo-instruct": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "azure_text", "max_input_tokens": 4097, "max_tokens": 4097, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "azure/gpt-35-turbo-instruct-0914": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "azure_text", "max_input_tokens": 4097, "max_tokens": 4097, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "azure/gpt-4": { - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "azure", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_tool_choice": true }, "azure/gpt-4-0125-preview": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "azure/gpt-4-0613": { - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "azure", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_tool_choice": true }, "azure/gpt-4-1106-preview": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "azure/gpt-4-32k": { - "input_cost_per_token": 6e-5, + "input_cost_per_token": 0.00006, "litellm_provider": "azure", "max_input_tokens": 32768, "max_output_tokens": 4096, @@ -1801,7 +2101,7 @@ "supports_tool_choice": true }, "azure/gpt-4-32k-0613": { - "input_cost_per_token": 6e-5, + "input_cost_per_token": 0.00006, "litellm_provider": "azure", "max_input_tokens": 32768, "max_output_tokens": 4096, @@ -1811,55 +2111,64 @@ "supports_tool_choice": true }, "azure/gpt-4-turbo": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "azure/gpt-4-turbo-2024-04-09": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "azure/gpt-4-turbo-vision-preview": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_tool_choice": true, "supports_vision": true }, "azure/gpt-4.1": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "azure", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8e-6, - "output_cost_per_token_batches": 4e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000008, + "output_cost_per_token_batches": 0.000004, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1873,18 +2182,27 @@ "azure/gpt-4.1-2025-04-14": { "deprecation_date": "2026-11-04", "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "azure", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8e-6, - "output_cost_per_token_batches": 4e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000008, + "output_cost_per_token_batches": 0.000004, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1904,11 +2222,20 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1929,11 +2256,20 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1955,9 +2291,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1979,9 +2324,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1992,16 +2346,16 @@ "supports_vision": true }, "azure/gpt-4.5-preview": { - "cache_read_input_token_cost": 3.75e-5, - "input_cost_per_token": 7.5e-5, - "input_cost_per_token_batches": 3.75e-5, + "cache_read_input_token_cost": 0.0000375, + "input_cost_per_token": 0.000075, + "input_cost_per_token_batches": 0.0000375, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00015, - "output_cost_per_token_batches": 7.5e-5, + "output_cost_per_token_batches": 0.000075, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2011,14 +2365,14 @@ "supports_vision": true }, "azure/gpt-4o": { - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2027,13 +2381,13 @@ "supports_vision": true }, "azure/gpt-4o-2024-05-13": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2042,14 +2396,14 @@ }, "azure/gpt-4o-2024-08-06": { "deprecation_date": "2026-02-27", - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2059,14 +2413,14 @@ }, "azure/gpt-4o-2024-11-20": { "deprecation_date": "2026-03-01", - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.75e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.00000275, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2075,18 +2429,26 @@ "supports_vision": true }, "azure/gpt-audio-2025-08-28": { - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2098,18 +2460,26 @@ "supports_vision": false }, "azure/gpt-audio-mini-2025-10-06": { - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_token": 6e-7, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 2e-5, - "output_cost_per_token": 2.4e-6, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2121,18 +2491,26 @@ "supports_vision": false }, "azure/gpt-4o-audio-preview-2024-12-17": { - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2176,18 +2554,26 @@ "supports_vision": true }, "azure/gpt-4o-mini-audio-preview-2024-12-17": { - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2201,15 +2587,15 @@ "azure/gpt-4o-mini-realtime-preview-2024-12-17": { "cache_creation_input_audio_token_cost": 3e-7, "cache_read_input_token_cost": 3e-7, - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_token": 6e-7, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 2e-5, - "output_cost_per_token": 2.4e-6, + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -2218,21 +2604,30 @@ "supports_tool_choice": true }, "azure/gpt-realtime-2025-08-28": { - "cache_creation_input_audio_token_cost": 4e-6, - "cache_read_input_token_cost": 4e-6, - "input_cost_per_audio_token": 3.2e-5, - "input_cost_per_image": 5e-6, - "input_cost_per_token": 4e-6, + "cache_creation_input_audio_token_cost": 0.000004, + "cache_read_input_token_cost": 0.000004, + "input_cost_per_audio_token": 0.000032, + "input_cost_per_image": 0.000005, + "input_cost_per_token": 0.000004, "litellm_provider": "azure", "max_input_tokens": 32000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 6.4e-5, - "output_cost_per_token": 1.6e-5, - "supported_endpoints": ["/v1/realtime"], - "supported_modalities": ["text", "image", "audio"], - "supported_output_modalities": ["text", "audio"], + "output_cost_per_audio_token": 0.000064, + "output_cost_per_token": 0.000016, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -2243,7 +2638,7 @@ "azure/gpt-realtime-mini-2025-10-06": { "cache_creation_input_audio_token_cost": 3e-7, "cache_read_input_token_cost": 6e-8, - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_image": 8e-7, "input_cost_per_token": 6e-7, "litellm_provider": "azure", @@ -2251,11 +2646,20 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 2e-5, - "output_cost_per_token": 2.4e-6, - "supported_endpoints": ["/v1/realtime"], - "supported_modalities": ["text", "image", "audio"], - "supported_output_modalities": ["text", "audio"], + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -2264,38 +2668,47 @@ "supports_tool_choice": true }, "azure/gpt-4o-mini-transcribe": { - "input_cost_per_audio_token": 3e-6, - "input_cost_per_token": 1.25e-6, + "input_cost_per_audio_token": 0.000003, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 16000, "max_output_tokens": 2000, "mode": "audio_transcription", - "output_cost_per_token": 5e-6, - "supported_endpoints": ["/v1/audio/transcriptions"] + "output_cost_per_token": 0.000005, + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "azure/gpt-4o-mini-tts": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "mode": "audio_speech", - "output_cost_per_audio_token": 1.2e-5, + "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/audio/speech"], - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["audio"] + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/audio/speech" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "audio" + ] }, "azure/gpt-4o-realtime-preview-2024-10-01": { - "cache_creation_input_audio_token_cost": 2e-5, - "cache_read_input_token_cost": 2.5e-6, + "cache_creation_input_audio_token_cost": 0.00002, + "cache_read_input_token_cost": 0.0000025, "input_cost_per_audio_token": 0.0001, - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", "output_cost_per_audio_token": 0.0002, - "output_cost_per_token": 2e-5, + "output_cost_per_token": 0.00002, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -2304,18 +2717,24 @@ "supports_tool_choice": true }, "azure/gpt-4o-realtime-preview-2024-12-17": { - "cache_read_input_token_cost": 2.5e-6, - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 5e-6, + "cache_read_input_token_cost": 0.0000025, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.000005, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 2e-5, - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00002, + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -2324,40 +2743,53 @@ "supports_tool_choice": true }, "azure/gpt-4o-transcribe": { - "input_cost_per_audio_token": 6e-6, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.000006, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 16000, "max_output_tokens": 2000, "mode": "audio_transcription", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/audio/transcriptions"] + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "azure/gpt-4o-transcribe-diarize": { - "input_cost_per_audio_token": 6e-6, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.000006, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 16000, "max_output_tokens": 2000, "mode": "audio_transcription", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/audio/transcriptions"] + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "azure/gpt-5.1-2025-11-13": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_priority": 2e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2373,18 +2805,27 @@ "azure/gpt-5.1-chat-2025-11-13": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_priority": 2e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -2399,18 +2840,25 @@ "azure/gpt-5.1-codex-2025-11-13": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1e-5, - "output_cost_per_token_priority": 2e-5, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2432,11 +2880,18 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 2e-6, - "output_cost_per_token_priority": 3.6e-6, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000002, + "output_cost_per_token_priority": 0.0000036, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2450,16 +2905,25 @@ }, "azure/gpt-5": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2473,16 +2937,25 @@ }, "azure/gpt-5-2025-08-07": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2496,17 +2969,26 @@ }, "azure/gpt-5-chat": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "source": "https://azure.microsoft.com/en-us/blog/gpt-5-in-azure-ai-foundry-the-future-of-ai-apps-and-agents-starts-here/", - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2520,16 +3002,25 @@ }, "azure/gpt-5-chat-latest": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2543,16 +3034,23 @@ }, "azure/gpt-5-codex": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2572,10 +3070,19 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000002, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2595,10 +3102,19 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000002, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2619,9 +3135,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2642,9 +3167,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2657,7 +3191,7 @@ "supports_vision": true }, "azure/gpt-5-pro": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, @@ -2665,9 +3199,16 @@ "mode": "responses", "output_cost_per_token": 0.00012, "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure?pivots=azure-openai&tabs=global-standard-aoai%2Cstandard-chat-completions%2Cglobal-standard#gpt-5", - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -2680,16 +3221,26 @@ }, "azure/gpt-5.1": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2703,16 +3254,26 @@ }, "azure/gpt-5.1-chat": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2726,16 +3287,53 @@ }, "azure/gpt-5.1-codex": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1-codex-max": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 0.00000125, + "litellm_provider": "azure", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2755,10 +3353,17 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 2e-6, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000002, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2774,187 +3379,227 @@ "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/hd/1024-x-1024/dall-e-3": { "input_cost_per_pixel": 7.629e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/hd/1024-x-1792/dall-e-3": { "input_cost_per_pixel": 6.539e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/hd/1792-x-1024/dall-e-3": { "input_cost_per_pixel": 6.539e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/high/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.59263611e-7, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/high/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/high/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0490417e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/gpt-image-1-mini": { "input_cost_per_pixel": 8.0566406e-9, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 2.0751953125e-9, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 2.0751953125e-9, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 2.0345052083e-9, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 8.056640625e-9, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 8.056640625e-9, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 7.9752604167e-9, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/high/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 3.173828125e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/high/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 3.173828125e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/high/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 3.1575520833e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/mistral-large-2402": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "azure", "max_input_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_function_calling": true }, "azure/mistral-large-latest": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "azure", "max_input_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_function_calling": true }, "azure/o1": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2963,14 +3608,14 @@ "supports_vision": true }, "azure/o1-2024-12-17": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2980,13 +3625,13 @@ }, "azure/o1-mini": { "cache_read_input_token_cost": 6.05e-7, - "input_cost_per_token": 1.21e-6, + "input_cost_per_token": 0.00000121, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 4.84e-6, + "output_cost_per_token": 0.00000484, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -2995,13 +3640,13 @@ }, "azure/o1-mini-2024-09-12": { "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -3009,14 +3654,14 @@ "supports_vision": false }, "azure/o1-preview": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -3024,14 +3669,14 @@ "supports_vision": false }, "azure/o1-preview-2024-09-12": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -3041,16 +3686,25 @@ }, "azure/o3": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 8e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000008, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -3062,16 +3716,25 @@ "azure/o3-2025-04-16": { "deprecation_date": "2026-04-16", "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 8e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000008, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -3081,17 +3744,26 @@ "supports_vision": true }, "azure/o3-deep-research": { - "cache_read_input_token_cost": 2.5e-6, - "input_cost_per_token": 1e-5, + "cache_read_input_token_cost": 0.0000025, + "input_cost_per_token": 0.00001, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 4e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00004, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -3105,13 +3777,13 @@ }, "azure/o3-mini": { "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, @@ -3120,31 +3792,40 @@ }, "azure/o3-mini-2025-01-31": { "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, "supports_vision": false }, "azure/o3-pro": { - "input_cost_per_token": 2e-5, - "input_cost_per_token_batches": 1e-5, + "input_cost_per_token": 0.00002, + "input_cost_per_token_batches": 0.00001, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 8e-5, - "output_cost_per_token_batches": 4e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00008, + "output_cost_per_token_batches": 0.00004, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": false, @@ -3154,18 +3835,27 @@ "supports_vision": true }, "azure/o3-pro-2025-06-10": { - "input_cost_per_token": 2e-5, - "input_cost_per_token_batches": 1e-5, + "input_cost_per_token": 0.00002, + "input_cost_per_token_batches": 0.00001, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 8e-5, - "output_cost_per_token_batches": 4e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00008, + "output_cost_per_token_batches": 0.00004, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": false, @@ -3176,16 +3866,25 @@ }, "azure/o4-mini": { "cache_read_input_token_cost": 2.75e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.0000044, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -3196,13 +3895,13 @@ }, "azure/o4-mini-2025-04-16": { "cache_read_input_token_cost": 2.75e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -3212,28 +3911,28 @@ "supports_vision": true }, "azure/standard/1024-x-1024/dall-e-2": { - "input_cost_per_pixel": 0.0, + "input_cost_per_pixel": 0, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/standard/1024-x-1024/dall-e-3": { "input_cost_per_pixel": 3.81469e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/standard/1024-x-1792/dall-e-3": { "input_cost_per_pixel": 4.359e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/standard/1792-x-1024/dall-e-3": { "input_cost_per_pixel": 4.359e-8, "litellm_provider": "azure", "mode": "image_generation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/text-embedding-3-large": { "input_cost_per_token": 1.3e-7, @@ -3241,7 +3940,7 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/text-embedding-3-small": { "deprecation_date": "2026-04-30", @@ -3250,7 +3949,7 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/text-embedding-ada-002": { "input_cost_per_token": 1e-7, @@ -3258,45 +3957,54 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure/speech/azure-tts": { - "input_cost_per_character": 15e-6, + "input_cost_per_character": 0.000015, "litellm_provider": "azure", "mode": "audio_speech", "source": "https://azure.microsoft.com/en-us/pricing/calculator/" }, "azure/speech/azure-tts-hd": { - "input_cost_per_character": 30e-6, + "input_cost_per_character": 0.00003, "litellm_provider": "azure", "mode": "audio_speech", "source": "https://azure.microsoft.com/en-us/pricing/calculator/" }, "azure/tts-1": { - "input_cost_per_character": 1.5e-5, + "input_cost_per_character": 0.000015, "litellm_provider": "azure", "mode": "audio_speech" }, "azure/tts-1-hd": { - "input_cost_per_character": 3e-5, + "input_cost_per_character": 0.00003, "litellm_provider": "azure", "mode": "audio_speech" }, "azure/us/gpt-4.1-2025-04-14": { "deprecation_date": "2026-11-04", "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 2.2e-6, - "input_cost_per_token_batches": 1.1e-6, + "input_cost_per_token": 0.0000022, + "input_cost_per_token_batches": 0.0000011, "litellm_provider": "azure", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8.8e-6, - "output_cost_per_token_batches": 4.4e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.0000088, + "output_cost_per_token_batches": 0.0000044, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3317,11 +4025,20 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.76e-6, + "output_cost_per_token": 0.00000176, "output_cost_per_token_batches": 8.8e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3344,9 +4061,18 @@ "mode": "chat", "output_cost_per_token": 4.4e-7, "output_cost_per_token_batches": 2.2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3358,14 +4084,14 @@ }, "azure/us/gpt-4o-2024-08-06": { "deprecation_date": "2026-02-27", - "cache_read_input_token_cost": 1.375e-6, - "input_cost_per_token": 2.75e-6, + "cache_read_input_token_cost": 0.000001375, + "input_cost_per_token": 0.00000275, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -3375,14 +4101,14 @@ }, "azure/us/gpt-4o-2024-11-20": { "deprecation_date": "2026-03-01", - "cache_creation_input_token_cost": 1.38e-6, - "input_cost_per_token": 2.75e-6, + "cache_creation_input_token_cost": 0.00000138, + "input_cost_per_token": 0.00000275, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.1e-5, + "output_cost_per_token": 0.000011, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_response_schema": true, @@ -3408,15 +4134,15 @@ "azure/us/gpt-4o-mini-realtime-preview-2024-12-17": { "cache_creation_input_audio_token_cost": 3.3e-7, "cache_read_input_token_cost": 3.3e-7, - "input_cost_per_audio_token": 1.1e-5, + "input_cost_per_audio_token": 0.000011, "input_cost_per_token": 6.6e-7, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 2.2e-5, - "output_cost_per_token": 2.64e-6, + "output_cost_per_audio_token": 0.000022, + "output_cost_per_token": 0.00000264, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -3425,17 +4151,17 @@ "supports_tool_choice": true }, "azure/us/gpt-4o-realtime-preview-2024-10-01": { - "cache_creation_input_audio_token_cost": 2.2e-5, - "cache_read_input_token_cost": 2.75e-6, + "cache_creation_input_audio_token_cost": 0.000022, + "cache_read_input_token_cost": 0.00000275, "input_cost_per_audio_token": 0.00011, - "input_cost_per_token": 5.5e-6, + "input_cost_per_token": 0.0000055, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", "output_cost_per_audio_token": 0.00022, - "output_cost_per_token": 2.2e-5, + "output_cost_per_token": 0.000022, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -3444,19 +4170,25 @@ "supports_tool_choice": true }, "azure/us/gpt-4o-realtime-preview-2024-12-17": { - "cache_read_input_audio_token_cost": 2.5e-6, - "cache_read_input_token_cost": 2.75e-6, - "input_cost_per_audio_token": 4.4e-5, - "input_cost_per_token": 5.5e-6, + "cache_read_input_audio_token_cost": 0.0000025, + "cache_read_input_token_cost": 0.00000275, + "input_cost_per_audio_token": 0.000044, + "input_cost_per_token": 0.0000055, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 2.2e-5, - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.000022, + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -3466,16 +4198,25 @@ }, "azure/us/gpt-5-2025-08-07": { "cache_read_input_token_cost": 1.375e-7, - "input_cost_per_token": 1.375e-6, + "input_cost_per_token": 0.000001375, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000011, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3495,10 +4236,19 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2.2e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.0000022, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3519,9 +4269,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4.4e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3535,16 +4294,26 @@ }, "azure/us/gpt-5.1": { "cache_read_input_token_cost": 1.4e-7, - "input_cost_per_token": 1.38e-6, + "input_cost_per_token": 0.00000138, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "output_cost_per_token": 0.000011, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3558,16 +4327,26 @@ }, "azure/us/gpt-5.1-chat": { "cache_read_input_token_cost": 1.4e-7, - "input_cost_per_token": 1.38e-6, + "input_cost_per_token": 0.00000138, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "output_cost_per_token": 0.000011, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3581,16 +4360,23 @@ }, "azure/us/gpt-5.1-codex": { "cache_read_input_token_cost": 1.4e-7, - "input_cost_per_token": 1.38e-6, + "input_cost_per_token": 0.00000138, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1.1e-5, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000011, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3610,10 +4396,17 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 2.2e-6, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.0000022, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3626,14 +4419,14 @@ "supports_vision": true }, "azure/us/o1-2024-12-17": { - "cache_read_input_token_cost": 8.25e-6, - "input_cost_per_token": 1.65e-5, + "cache_read_input_token_cost": 0.00000825, + "input_cost_per_token": 0.0000165, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 6.6e-5, + "output_cost_per_token": 0.000066, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -3642,29 +4435,29 @@ }, "azure/us/o1-mini-2024-09-12": { "cache_read_input_token_cost": 6.05e-7, - "input_cost_per_token": 1.21e-6, + "input_cost_per_token": 0.00000121, "input_cost_per_token_batches": 6.05e-7, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 4.84e-6, - "output_cost_per_token_batches": 2.42e-6, + "output_cost_per_token": 0.00000484, + "output_cost_per_token_batches": 0.00000242, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, "supports_vision": false }, "azure/us/o1-preview-2024-09-12": { - "cache_read_input_token_cost": 8.25e-6, - "input_cost_per_token": 1.65e-5, + "cache_read_input_token_cost": 0.00000825, + "input_cost_per_token": 0.0000165, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6.6e-5, + "output_cost_per_token": 0.000066, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -3673,16 +4466,25 @@ "azure/us/o3-2025-04-16": { "deprecation_date": "2026-04-16", "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 2.2e-6, + "input_cost_per_token": 0.0000022, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 8.8e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.0000088, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -3693,15 +4495,15 @@ }, "azure/us/o3-mini-2025-01-31": { "cache_read_input_token_cost": 6.05e-7, - "input_cost_per_token": 1.21e-6, + "input_cost_per_token": 0.00000121, "input_cost_per_token_batches": 6.05e-7, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.84e-6, - "output_cost_per_token_batches": 2.42e-6, + "output_cost_per_token": 0.00000484, + "output_cost_per_token_batches": 0.00000242, "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, @@ -3709,13 +4511,13 @@ }, "azure/us/o4-mini-2025-04-16": { "cache_read_input_token_cost": 3.1e-7, - "input_cost_per_token": 1.21e-6, + "input_cost_per_token": 0.00000121, "litellm_provider": "azure", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.84e-6, + "output_cost_per_token": 0.00000484, "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -3736,7 +4538,7 @@ "max_input_tokens": 512, "max_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024, "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice", "supports_embedding_image_input": true @@ -3747,7 +4549,7 @@ "max_input_tokens": 512, "max_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024, "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice", "supports_embedding_image_input": true @@ -3757,14 +4559,18 @@ "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/black-forest-labs-flux-1-kontext-pro-and-flux1-1-pro-now-available-in-azure-ai-f/4434659", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure_ai/FLUX.1-Kontext-pro": { "litellm_provider": "azure_ai", "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure_ai/Llama-3.2-11B-Vision-Instruct": { "input_cost_per_token": 3.7e-7, @@ -3780,13 +4586,13 @@ "supports_vision": true }, "azure_ai/Llama-3.2-90B-Vision-Instruct": { - "input_cost_per_token": 2.04e-6, + "input_cost_per_token": 0.00000204, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 2.04e-6, + "output_cost_per_token": 0.00000204, "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-90b-vision-instruct-offer?tab=Overview", "supports_function_calling": true, "supports_tool_choice": true, @@ -3805,7 +4611,7 @@ "supports_tool_choice": true }, "azure_ai/Llama-4-Maverick-17B-128E-Instruct-FP8": { - "input_cost_per_token": 1.41e-6, + "input_cost_per_token": 0.00000141, "litellm_provider": "azure_ai", "max_input_tokens": 1000000, "max_output_tokens": 16384, @@ -3831,7 +4637,7 @@ "supports_vision": true }, "azure_ai/Meta-Llama-3-70B-Instruct": { - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "azure_ai", "max_input_tokens": 8192, "max_output_tokens": 2048, @@ -3841,24 +4647,24 @@ "supports_tool_choice": true }, "azure_ai/Meta-Llama-3.1-405B-Instruct": { - "input_cost_per_token": 5.33e-6, + "input_cost_per_token": 0.00000533, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 1.6e-5, + "output_cost_per_token": 0.000016, "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice", "supports_tool_choice": true }, "azure_ai/Meta-Llama-3.1-70B-Instruct": { - "input_cost_per_token": 2.68e-6, + "input_cost_per_token": 0.00000268, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 3.54e-6, + "output_cost_per_token": 0.00000354, "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-70b-instruct-offer?tab=PlansAndPrice", "supports_tool_choice": true }, @@ -4006,7 +4812,7 @@ "supports_function_calling": true }, "azure_ai/Phi-4-multimodal-instruct": { - "input_cost_per_audio_token": 4e-6, + "input_cost_per_audio_token": 0.000004, "input_cost_per_token": 8e-8, "litellm_provider": "azure_ai", "max_input_tokens": 131072, @@ -4045,108 +4851,116 @@ }, "azure_ai/mistral-document-ai-2505": { "litellm_provider": "azure_ai", - "ocr_cost_per_page": 3e-3, + "ocr_cost_per_page": 0.003, "mode": "ocr", - "supported_endpoints": ["/v1/ocr"], + "supported_endpoints": [ + "/v1/ocr" + ], "source": "https://devblogs.microsoft.com/foundry/whats-new-in-azure-ai-foundry-august-2025/#mistral-document-ai-(ocr)-%E2%80%94-serverless-in-foundry" }, "azure_ai/doc-intelligence/prebuilt-read": { "litellm_provider": "azure_ai", - "ocr_cost_per_page": 1.5e-3, + "ocr_cost_per_page": 0.0015, "mode": "ocr", - "supported_endpoints": ["/v1/ocr"], + "supported_endpoints": [ + "/v1/ocr" + ], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/doc-intelligence/prebuilt-layout": { "litellm_provider": "azure_ai", - "ocr_cost_per_page": 1e-2, + "ocr_cost_per_page": 0.01, "mode": "ocr", - "supported_endpoints": ["/v1/ocr"], + "supported_endpoints": [ + "/v1/ocr" + ], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/doc-intelligence/prebuilt-document": { "litellm_provider": "azure_ai", - "ocr_cost_per_page": 1e-2, + "ocr_cost_per_page": 0.01, "mode": "ocr", - "supported_endpoints": ["/v1/ocr"], + "supported_endpoints": [ + "/v1/ocr" + ], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/MAI-DS-R1": { - "input_cost_per_token": 1.35e-6, + "input_cost_per_token": 0.00000135, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5.4e-6, + "output_cost_per_token": 0.0000054, "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", "supports_reasoning": true, "supports_tool_choice": true }, "azure_ai/cohere-rerank-v3-english": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "azure_ai", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure_ai/cohere-rerank-v3-multilingual": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "azure_ai", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure_ai/cohere-rerank-v3.5": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "azure_ai", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "azure_ai/deepseek-r1": { - "input_cost_per_token": 1.35e-6, + "input_cost_per_token": 0.00000135, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5.4e-6, + "output_cost_per_token": 0.0000054, "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/deepseek-r1-improved-performance-higher-limits-and-transparent-pricing/4386367", "supports_reasoning": true, "supports_tool_choice": true }, "azure_ai/deepseek-v3": { - "input_cost_per_token": 1.14e-6, + "input_cost_per_token": 0.00000114, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4.56e-6, + "output_cost_per_token": 0.00000456, "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438", "supports_tool_choice": true }, "azure_ai/deepseek-v3-0324": { - "input_cost_per_token": 1.14e-6, + "input_cost_per_token": 0.00000114, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4.56e-6, + "output_cost_per_token": 0.00000456, "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438", "supports_function_calling": true, "supports_tool_choice": true @@ -4157,21 +4971,26 @@ "max_input_tokens": 128000, "max_tokens": 128000, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 3072, "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", - "supported_endpoints": ["/v1/embeddings"], - "supported_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/embeddings" + ], + "supported_modalities": [ + "text", + "image" + ], "supports_embedding_image_input": true }, "azure_ai/global/grok-3": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "source": "https://devblogs.microsoft.com/foundry/announcing-grok-3-and-grok-3-mini-on-azure-ai-foundry/", "supports_function_calling": true, "supports_response_schema": false, @@ -4185,7 +5004,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.27e-6, + "output_cost_per_token": 0.00000127, "source": "https://devblogs.microsoft.com/foundry/announcing-grok-3-and-grok-3-mini-on-azure-ai-foundry/", "supports_function_calling": true, "supports_reasoning": true, @@ -4194,13 +5013,13 @@ "supports_web_search": true }, "azure_ai/grok-3": { - "input_cost_per_token": 3.3e-6, + "input_cost_per_token": 0.0000033, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.65e-5, + "output_cost_per_token": 0.0000165, "source": "https://devblogs.microsoft.com/foundry/announcing-grok-3-and-grok-3-mini-on-azure-ai-foundry/", "supports_function_calling": true, "supports_response_schema": false, @@ -4214,7 +5033,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.38e-6, + "output_cost_per_token": 0.00000138, "source": "https://devblogs.microsoft.com/foundry/announcing-grok-3-and-grok-3-mini-on-azure-ai-foundry/", "supports_function_calling": true, "supports_reasoning": true, @@ -4223,13 +5042,13 @@ "supports_web_search": true }, "azure_ai/grok-4": { - "input_cost_per_token": 5.5e-6, + "input_cost_per_token": 0.0000055, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.75e-5, + "output_cost_per_token": 0.0000275, "source": "https://azure.microsoft.com/en-us/blog/grok-4-is-now-available-in-azure-ai-foundry-unlock-frontier-intelligence-and-business-ready-capabilities/", "supports_function_calling": true, "supports_response_schema": true, @@ -4237,8 +5056,8 @@ "supports_web_search": true }, "azure_ai/grok-4-fast-non-reasoning": { - "input_cost_per_token": 0.43e-6, - "output_cost_per_token": 1.73e-6, + "input_cost_per_token": 4.3e-7, + "output_cost_per_token": 0.00000173, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, @@ -4250,8 +5069,8 @@ "supports_web_search": true }, "azure_ai/grok-4-fast-reasoning": { - "input_cost_per_token": 0.43e-6, - "output_cost_per_token": 1.73e-6, + "input_cost_per_token": 4.3e-7, + "output_cost_per_token": 0.00000173, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, @@ -4264,13 +5083,13 @@ "supports_web_search": true }, "azure_ai/grok-code-fast-1": { - "input_cost_per_token": 3.5e-6, + "input_cost_per_token": 0.0000035, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.75e-5, + "output_cost_per_token": 0.0000175, "source": "https://azure.microsoft.com/en-us/blog/grok-4-is-now-available-in-azure-ai-foundry-unlock-frontier-intelligence-and-business-ready-capabilities/", "supports_function_calling": true, "supports_response_schema": true, @@ -4310,40 +5129,53 @@ "supports_tool_choice": true }, "azure_ai/mistral-large": { - "input_cost_per_token": 4e-6, + "input_cost_per_token": 0.000004, "litellm_provider": "azure_ai", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 1.2e-5, + "output_cost_per_token": 0.000012, "supports_function_calling": true, "supports_tool_choice": true }, "azure_ai/mistral-large-2407": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview", "supports_function_calling": true, "supports_tool_choice": true }, "azure_ai/mistral-large-latest": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview", "supports_function_calling": true, "supports_tool_choice": true }, + "azure_ai/mistral-large-3": { + "input_cost_per_token": 5e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 256000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.0000015, + "source": "https://azure.microsoft.com/en-us/blog/introducing-mistral-large-3-in-microsoft-foundry-open-capable-and-ready-for-production-workloads/", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, "azure_ai/mistral-medium-2505": { "input_cost_per_token": 4e-7, "litellm_provider": "azure_ai", @@ -4351,7 +5183,7 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true @@ -4368,24 +5200,24 @@ "supports_function_calling": true }, "azure_ai/mistral-small": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "azure_ai", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_function_calling": true, "supports_tool_choice": true }, "azure_ai/mistral-small-2503": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true @@ -4498,43 +5330,43 @@ "supports_tool_choice": true }, "bedrock/ap-northeast-1/anthropic.claude-instant-v1": { - "input_cost_per_token": 2.23e-6, + "input_cost_per_token": 0.00000223, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 7.55e-6, + "output_cost_per_token": 0.00000755, "supports_tool_choice": true }, "bedrock/ap-northeast-1/anthropic.claude-v1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "bedrock/ap-northeast-1/anthropic.claude-v2:1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 3.18e-6, + "input_cost_per_token": 0.00000318, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4.2e-6 + "output_cost_per_token": 0.0000042 }, "bedrock/ap-south-1/meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 3.6e-7, @@ -4546,13 +5378,13 @@ "output_cost_per_token": 7.2e-7 }, "bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 3.05e-6, + "input_cost_per_token": 0.00000305, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4.03e-6 + "output_cost_per_token": 0.00000403 }, "bedrock/ca-central-1/meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 3.5e-7, @@ -4622,42 +5454,42 @@ "supports_tool_choice": true }, "bedrock/eu-central-1/anthropic.claude-instant-v1": { - "input_cost_per_token": 2.48e-6, + "input_cost_per_token": 0.00000248, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 8.38e-6, + "output_cost_per_token": 0.00000838, "supports_tool_choice": true }, "bedrock/eu-central-1/anthropic.claude-v1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5 + "output_cost_per_token": 0.000024 }, "bedrock/eu-central-1/anthropic.claude-v2:1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 2.86e-6, + "input_cost_per_token": 0.00000286, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 3.78e-6 + "output_cost_per_token": 0.00000378 }, "bedrock/eu-west-1/meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 3.2e-7, @@ -4669,13 +5501,13 @@ "output_cost_per_token": 6.5e-7 }, "bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 3.45e-6, + "input_cost_per_token": 0.00000345, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4.55e-6 + "output_cost_per_token": 0.00000455 }, "bedrock/eu-west-2/meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 3.9e-7, @@ -4697,13 +5529,13 @@ "supports_tool_choice": true }, "bedrock/eu-west-3/mistral.mistral-large-2402-v1:0": { - "input_cost_per_token": 1.04e-5, + "input_cost_per_token": 0.0000104, "litellm_provider": "bedrock", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 3.12e-5, + "output_cost_per_token": 0.0000312, "supports_function_calling": true }, "bedrock/eu-west-3/mistral.mixtral-8x7b-instruct-v0:1": { @@ -4717,7 +5549,7 @@ "supports_tool_choice": true }, "bedrock/invoke/anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, @@ -4726,20 +5558,20 @@ "notes": "Anthropic via Invoke route does not currently support pdf input." }, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true }, "bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 4.45e-6, + "input_cost_per_token": 0.00000445, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5.88e-6 + "output_cost_per_token": 0.00000588 }, "bedrock/sa-east-1/meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 5e-7, @@ -4748,7 +5580,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.01e-6 + "output_cost_per_token": 0.00000101 }, "bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1": { "input_cost_per_second": 0.011, @@ -4815,37 +5647,37 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-6, + "output_cost_per_token": 0.0000024, "supports_tool_choice": true }, "bedrock/us-east-1/anthropic.claude-v1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "bedrock/us-east-1/anthropic.claude-v2:1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "bedrock/us-east-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 2.65e-6, + "input_cost_per_token": 0.00000265, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 3.5e-6 + "output_cost_per_token": 0.0000035 }, "bedrock/us-east-1/meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 3e-7, @@ -4867,13 +5699,13 @@ "supports_tool_choice": true }, "bedrock/us-east-1/mistral.mistral-large-2402-v1:0": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_function_calling": true }, "bedrock/us-east-1/mistral.mixtral-8x7b-instruct-v0:1": { @@ -4893,7 +5725,7 @@ "max_output_tokens": 10000, "max_tokens": 10000, "mode": "chat", - "output_cost_per_token": 3.84e-6, + "output_cost_per_token": 0.00000384, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, @@ -4906,7 +5738,7 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1536 }, "bedrock/us-gov-east-1/amazon.titan-embed-text-v2:0": { @@ -4915,17 +5747,17 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024 }, "bedrock/us-gov-east-1/amazon.titan-text-express-v1": { - "input_cost_per_token": 1.3e-6, + "input_cost_per_token": 0.0000013, "litellm_provider": "bedrock", "max_input_tokens": 42000, "max_output_tokens": 8000, "max_tokens": 8000, "mode": "chat", - "output_cost_per_token": 1.7e-6 + "output_cost_per_token": 0.0000017 }, "bedrock/us-gov-east-1/amazon.titan-text-lite-v1": { "input_cost_per_token": 3e-7, @@ -4943,16 +5775,16 @@ "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 1.5e-6 + "output_cost_per_token": 0.0000015 }, "bedrock/us-gov-east-1/anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 3.6e-6, + "input_cost_per_token": 0.0000036, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.8e-5, + "output_cost_per_token": 0.000018, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -4966,7 +5798,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -4974,13 +5806,13 @@ "supports_vision": true }, "bedrock/us-gov-east-1/claude-sonnet-4-5-20250929-v1:0": { - "input_cost_per_token": 3.3e-6, + "input_cost_per_token": 0.0000033, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.65e-5, + "output_cost_per_token": 0.0000165, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -4992,13 +5824,13 @@ "supports_vision": true }, "bedrock/us-gov-east-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 2.65e-6, + "input_cost_per_token": 0.00000265, "litellm_provider": "bedrock", "max_input_tokens": 8000, "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 3.5e-6, + "output_cost_per_token": 0.0000035, "supports_pdf_input": true }, "bedrock/us-gov-east-1/meta.llama3-8b-instruct-v1:0": { @@ -5008,7 +5840,7 @@ "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 2.65e-6, + "output_cost_per_token": 0.00000265, "supports_pdf_input": true }, "bedrock/us-gov-west-1/amazon.nova-pro-v1:0": { @@ -5018,7 +5850,7 @@ "max_output_tokens": 10000, "max_tokens": 10000, "mode": "chat", - "output_cost_per_token": 3.84e-6, + "output_cost_per_token": 0.00000384, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, @@ -5031,7 +5863,7 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1536 }, "bedrock/us-gov-west-1/amazon.titan-embed-text-v2:0": { @@ -5040,17 +5872,17 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1024 }, "bedrock/us-gov-west-1/amazon.titan-text-express-v1": { - "input_cost_per_token": 1.3e-6, + "input_cost_per_token": 0.0000013, "litellm_provider": "bedrock", "max_input_tokens": 42000, "max_output_tokens": 8000, "max_tokens": 8000, "mode": "chat", - "output_cost_per_token": 1.7e-6 + "output_cost_per_token": 0.0000017 }, "bedrock/us-gov-west-1/amazon.titan-text-lite-v1": { "input_cost_per_token": 3e-7, @@ -5068,18 +5900,18 @@ "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 1.5e-6 + "output_cost_per_token": 0.0000015 }, "bedrock/us-gov-west-1/anthropic.claude-3-7-sonnet-20250219-v1:0": { - "cache_creation_input_token_cost": 4.5e-6, + "cache_creation_input_token_cost": 0.0000045, "cache_read_input_token_cost": 3.6e-7, - "input_cost_per_token": 3.6e-6, + "input_cost_per_token": 0.0000036, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.8e-5, + "output_cost_per_token": 0.000018, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -5091,13 +5923,13 @@ "supports_vision": true }, "bedrock/us-gov-west-1/anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 3.6e-6, + "input_cost_per_token": 0.0000036, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.8e-5, + "output_cost_per_token": 0.000018, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -5111,7 +5943,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -5119,13 +5951,13 @@ "supports_vision": true }, "bedrock/us-gov-west-1/claude-sonnet-4-5-20250929-v1:0": { - "input_cost_per_token": 3.3e-6, + "input_cost_per_token": 0.0000033, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.65e-5, + "output_cost_per_token": 0.0000165, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -5137,13 +5969,13 @@ "supports_vision": true }, "bedrock/us-gov-west-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 2.65e-6, + "input_cost_per_token": 0.00000265, "litellm_provider": "bedrock", "max_input_tokens": 8000, "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 3.5e-6, + "output_cost_per_token": 0.0000035, "supports_pdf_input": true }, "bedrock/us-gov-west-1/meta.llama3-8b-instruct-v1:0": { @@ -5153,17 +5985,17 @@ "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 2.65e-6, + "output_cost_per_token": 0.00000265, "supports_pdf_input": true }, "bedrock/us-west-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 2.65e-6, + "input_cost_per_token": 0.00000265, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 3.5e-6 + "output_cost_per_token": 0.0000035 }, "bedrock/us-west-1/meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 3e-7, @@ -5239,27 +6071,27 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-6, + "output_cost_per_token": 0.0000024, "supports_tool_choice": true }, "bedrock/us-west-2/anthropic.claude-v1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "bedrock/us-west-2/anthropic.claude-v2:1": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "bedrock/us-west-2/mistral.mistral-7b-instruct-v0:2": { @@ -5273,13 +6105,13 @@ "supports_tool_choice": true }, "bedrock/us-west-2/mistral.mistral-large-2402-v1:0": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_function_calling": true }, "bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": { @@ -5293,7 +6125,7 @@ "supports_tool_choice": true }, "bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0": { - "cache_creation_input_token_cost": 1e-6, + "cache_creation_input_token_cost": 0.000001, "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 8e-7, "litellm_provider": "bedrock", @@ -5301,7 +6133,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -5316,7 +6148,7 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "supports_function_calling": true, "supports_tool_choice": true }, @@ -5445,13 +6277,13 @@ "output_cost_per_token": 5e-7 }, "chatgpt-4o-latest": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -5461,8 +6293,8 @@ "supports_vision": true }, "claude-3-5-haiku-20241022": { - "cache_creation_input_token_cost": 1e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.000001, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 8e-8, "deprecation_date": "2025-10-01", "input_cost_per_token": 8e-7, @@ -5471,7 +6303,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5488,17 +6320,17 @@ "tool_use_system_prompt_tokens": 264 }, "claude-3-5-haiku-latest": { - "cache_creation_input_token_cost": 1.25e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.00000125, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 1e-7, "deprecation_date": "2025-10-01", - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5515,16 +6347,16 @@ "tool_use_system_prompt_tokens": 264 }, "claude-haiku-4-5-20251001": { - "cache_creation_input_token_cost": 1.25e-6, - "cache_creation_input_token_cost_above_1hr": 2e-6, + "cache_creation_input_token_cost": 0.00000125, + "cache_creation_input_token_cost_above_1hr": 0.000002, "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_computer_use": true, @@ -5536,16 +6368,16 @@ "supports_vision": true }, "claude-haiku-4-5": { - "cache_creation_input_token_cost": 1.25e-6, - "cache_creation_input_token_cost_above_1hr": 2e-6, + "cache_creation_input_token_cost": 0.00000125, + "cache_creation_input_token_cost_above_1hr": 0.000002, "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_computer_use": true, @@ -5557,17 +6389,17 @@ "supports_vision": true }, "claude-3-5-sonnet-20240620": { - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-7, "deprecation_date": "2025-06-01", - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -5578,17 +6410,17 @@ "tool_use_system_prompt_tokens": 159 }, "claude-3-5-sonnet-20241022": { - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-7, "deprecation_date": "2025-10-01", - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5606,17 +6438,17 @@ "tool_use_system_prompt_tokens": 159 }, "claude-3-5-sonnet-latest": { - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-7, "deprecation_date": "2025-06-01", - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5634,17 +6466,17 @@ "tool_use_system_prompt_tokens": 159 }, "claude-3-7-sonnet-20250219": { - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-7, "deprecation_date": "2026-02-19", - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5663,17 +6495,17 @@ "tool_use_system_prompt_tokens": 159 }, "claude-3-7-sonnet-latest": { - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-7, "deprecation_date": "2025-06-01", - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5692,7 +6524,7 @@ }, "claude-3-haiku-20240307": { "cache_creation_input_token_cost": 3e-7, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-8, "input_cost_per_token": 2.5e-7, "litellm_provider": "anthropic", @@ -5700,7 +6532,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -5710,17 +6542,17 @@ "tool_use_system_prompt_tokens": 264 }, "claude-3-opus-20240229": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_creation_input_token_cost_above_1hr": 6e-6, - "cache_read_input_token_cost": 1.5e-6, + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.000006, + "cache_read_input_token_cost": 0.0000015, "deprecation_date": "2026-05-01", - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -5730,17 +6562,17 @@ "tool_use_system_prompt_tokens": 395 }, "claude-3-opus-latest": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_creation_input_token_cost_above_1hr": 6e-6, - "cache_read_input_token_cost": 1.5e-6, + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.000006, + "cache_read_input_token_cost": 0.0000015, "deprecation_date": "2025-03-01", - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -5750,15 +6582,15 @@ "tool_use_system_prompt_tokens": 395 }, "claude-4-opus-20250514": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5776,19 +6608,19 @@ "tool_use_system_prompt_tokens": 159 }, "claude-4-sonnet-20250514": { - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost": 3e-7, "cache_read_input_token_cost_above_200k_tokens": 6e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, "litellm_provider": "anthropic", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 1000000, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_above_200k_tokens": 2.25e-5, + "output_cost_per_token": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.0000225, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5806,19 +6638,19 @@ "tool_use_system_prompt_tokens": 159 }, "claude-sonnet-4-5": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5836,19 +6668,19 @@ "tool_use_system_prompt_tokens": 346 }, "claude-sonnet-4-5-20250929": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5867,19 +6699,19 @@ "tool_use_system_prompt_tokens": 346 }, "claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -5892,16 +6724,16 @@ "tool_use_system_prompt_tokens": 159 }, "claude-opus-4-1": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_creation_input_token_cost_above_1hr": 3e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.00003, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5919,17 +6751,17 @@ "tool_use_system_prompt_tokens": 159 }, "claude-opus-4-1-20250805": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_creation_input_token_cost_above_1hr": 3e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.00003, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "deprecation_date": "2026-08-05", "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5947,17 +6779,71 @@ "tool_use_system_prompt_tokens": 159 }, "claude-opus-4-20250514": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_creation_input_token_cost_above_1hr": 3e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.00003, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "deprecation_date": "2026-05-14", "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-5-20251101": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-5": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -5976,20 +6862,20 @@ }, "claude-sonnet-4-20250514": { "deprecation_date": "2026-05-14", - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_1hr": 6e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "anthropic", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -6007,40 +6893,40 @@ "tool_use_system_prompt_tokens": 159 }, "cloudflare/@cf/meta/llama-2-7b-chat-fp16": { - "input_cost_per_token": 1.923e-6, + "input_cost_per_token": 0.000001923, "litellm_provider": "cloudflare", "max_input_tokens": 3072, "max_output_tokens": 3072, "max_tokens": 3072, "mode": "chat", - "output_cost_per_token": 1.923e-6 + "output_cost_per_token": 0.000001923 }, "cloudflare/@cf/meta/llama-2-7b-chat-int8": { - "input_cost_per_token": 1.923e-6, + "input_cost_per_token": 0.000001923, "litellm_provider": "cloudflare", "max_input_tokens": 2048, "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 1.923e-6 + "output_cost_per_token": 0.000001923 }, "cloudflare/@cf/mistral/mistral-7b-instruct-v0.1": { - "input_cost_per_token": 1.923e-6, + "input_cost_per_token": 0.000001923, "litellm_provider": "cloudflare", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.923e-6 + "output_cost_per_token": 0.000001923 }, "cloudflare/@hf/thebloke/codellama-7b-instruct-awq": { - "input_cost_per_token": 1.923e-6, + "input_cost_per_token": 0.000001923, "litellm_provider": "cloudflare", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.923e-6 + "output_cost_per_token": 0.000001923 }, "code-bison": { "input_cost_per_character": 2.5e-7, @@ -6222,41 +7108,48 @@ "supports_tool_choice": true }, "codestral/codestral-2405": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "codestral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://docs.mistral.ai/capabilities/code_generation/", "supports_assistant_prefill": true, "supports_tool_choice": true }, "codestral/codestral-latest": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "codestral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://docs.mistral.ai/capabilities/code_generation/", "supports_assistant_prefill": true, "supports_tool_choice": true }, "codex-mini-latest": { "cache_read_input_token_cost": 3.75e-7, - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 6e-6, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000006, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -6278,13 +7171,13 @@ "supports_tool_choice": true }, "cohere.command-r-plus-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_tool_choice": true }, "cohere.command-r-v1:0": { @@ -6294,17 +7187,17 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_tool_choice": true }, "cohere.command-text-v14": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "bedrock", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_tool_choice": true }, "cohere.embed-english-v3": { @@ -6313,7 +7206,7 @@ "max_input_tokens": 512, "max_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_embedding_image_input": true }, "cohere.embed-multilingual-v3": { @@ -6322,7 +7215,7 @@ "max_input_tokens": 512, "max_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_embedding_image_input": true }, "cohere.embed-v4:0": { @@ -6331,7 +7224,7 @@ "max_input_tokens": 128000, "max_tokens": 128000, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1536, "supports_embedding_image_input": true }, @@ -6341,13 +7234,13 @@ "max_input_tokens": 128000, "max_tokens": 128000, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1536, "supports_embedding_image_input": true }, "cohere.rerank-v3-5:0": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "bedrock", "max_document_chunks_per_query": 100, "max_input_tokens": 32000, @@ -6356,25 +7249,25 @@ "max_tokens": 32000, "max_tokens_per_document_chunk": 512, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "command": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "cohere", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "command-a-03-2025": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "cohere_chat", "max_input_tokens": 256000, "max_output_tokens": 8000, "max_tokens": 8000, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true }, @@ -6389,13 +7282,13 @@ "supports_tool_choice": true }, "command-nightly": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "cohere", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "command-r": { "input_cost_per_token": 1.5e-7, @@ -6420,24 +7313,24 @@ "supports_tool_choice": true }, "command-r-plus": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "cohere_chat", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true }, "command-r-plus-08-2024": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "cohere_chat", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true }, @@ -6454,16 +7347,23 @@ "supports_tool_choice": true }, "computer-use-preview": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "azure", "max_input_tokens": 8192, "max_output_tokens": 1024, "max_tokens": 1024, "mode": "chat", - "output_cost_per_token": 1.2e-5, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000012, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": false, @@ -6481,9 +7381,11 @@ "max_output_tokens": 8192, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.7e-6, + "output_cost_per_token": 0.0000017, "source": "https://api-docs.deepseek.com/quick_start/pricing", - "supported_endpoints": ["/v1/chat/completions"], + "supported_endpoints": [ + "/v1/chat/completions" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -6500,9 +7402,11 @@ "max_output_tokens": 65536, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.7e-6, + "output_cost_per_token": 0.0000017, "source": "https://api-docs.deepseek.com/quick_start/pricing", - "supported_endpoints": ["/v1/chat/completions"], + "supported_endpoints": [ + "/v1/chat/completions" + ], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -6519,7 +7423,7 @@ "max_output_tokens": 16384, "max_tokens": 1000000, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, @@ -6539,12 +7443,18 @@ { "input_cost_per_token": 5e-8, "output_cost_per_token": 4e-7, - "range": [0, 256000.0] + "range": [ + 0, + 256000 + ] }, { "input_cost_per_token": 2.5e-7, - "output_cost_per_token": 2e-6, - "range": [256000.0, 1000000.0] + "output_cost_per_token": 0.000002, + "range": [ + 256000, + 1000000 + ] } ] }, @@ -6562,23 +7472,29 @@ { "input_cost_per_token": 5e-8, "output_cost_per_token": 4e-7, - "range": [0, 256000.0] + "range": [ + 0, + 256000 + ] }, { "input_cost_per_token": 2.5e-7, - "output_cost_per_token": 2e-6, - "range": [256000.0, 1000000.0] + "output_cost_per_token": 0.000002, + "range": [ + 256000, + 1000000 + ] } ] }, "dashscope/qwen-max": { - "input_cost_per_token": 1.6e-6, + "input_cost_per_token": 0.0000016, "litellm_provider": "dashscope", "max_input_tokens": 30720, "max_output_tokens": 8192, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6.4e-6, + "output_cost_per_token": 0.0000064, "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, @@ -6591,7 +7507,7 @@ "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, @@ -6604,7 +7520,7 @@ "max_output_tokens": 8192, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, @@ -6617,8 +7533,8 @@ "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_reasoning_token": 4e-6, - "output_cost_per_token": 1.2e-6, + "output_cost_per_reasoning_token": 0.000004, + "output_cost_per_token": 0.0000012, "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, @@ -6631,8 +7547,8 @@ "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_reasoning_token": 4e-6, - "output_cost_per_token": 1.2e-6, + "output_cost_per_reasoning_token": 0.000004, + "output_cost_per_token": 0.0000012, "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, @@ -6651,15 +7567,21 @@ "tiered_pricing": [ { "input_cost_per_token": 4e-7, - "output_cost_per_reasoning_token": 4e-6, - "output_cost_per_token": 1.2e-6, - "range": [0, 256000.0] + "output_cost_per_reasoning_token": 0.000004, + "output_cost_per_token": 0.0000012, + "range": [ + 0, + 256000 + ] }, { - "input_cost_per_token": 1.2e-6, - "output_cost_per_reasoning_token": 1.2e-5, - "output_cost_per_token": 3.6e-6, - "range": [256000.0, 1000000.0] + "input_cost_per_token": 0.0000012, + "output_cost_per_reasoning_token": 0.000012, + "output_cost_per_token": 0.0000036, + "range": [ + 256000, + 1000000 + ] } ] }, @@ -6676,15 +7598,21 @@ "tiered_pricing": [ { "input_cost_per_token": 4e-7, - "output_cost_per_reasoning_token": 4e-6, - "output_cost_per_token": 1.2e-6, - "range": [0, 256000.0] + "output_cost_per_reasoning_token": 0.000004, + "output_cost_per_token": 0.0000012, + "range": [ + 0, + 256000 + ] }, { - "input_cost_per_token": 1.2e-6, - "output_cost_per_reasoning_token": 1.2e-5, - "output_cost_per_token": 3.6e-6, - "range": [256000.0, 1000000.0] + "input_cost_per_token": 0.0000012, + "output_cost_per_reasoning_token": 0.000012, + "output_cost_per_token": 0.0000036, + "range": [ + 256000, + 1000000 + ] } ] }, @@ -6701,15 +7629,21 @@ "tiered_pricing": [ { "input_cost_per_token": 4e-7, - "output_cost_per_reasoning_token": 4e-6, - "output_cost_per_token": 1.2e-6, - "range": [0, 256000.0] + "output_cost_per_reasoning_token": 0.000004, + "output_cost_per_token": 0.0000012, + "range": [ + 0, + 256000 + ] }, { - "input_cost_per_token": 1.2e-6, - "output_cost_per_reasoning_token": 1.2e-5, - "output_cost_per_token": 3.6e-6, - "range": [256000.0, 1000000.0] + "input_cost_per_token": 0.0000012, + "output_cost_per_reasoning_token": 0.000012, + "output_cost_per_token": 0.0000036, + "range": [ + 256000, + 1000000 + ] } ] }, @@ -6793,26 +7727,38 @@ { "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 3e-7, - "output_cost_per_token": 1.5e-6, - "range": [0, 32000.0] + "output_cost_per_token": 0.0000015, + "range": [ + 0, + 32000 + ] }, { "cache_read_input_token_cost": 1.2e-7, "input_cost_per_token": 5e-7, - "output_cost_per_token": 2.5e-6, - "range": [32000.0, 128000.0] + "output_cost_per_token": 0.0000025, + "range": [ + 32000, + 128000 + ] }, { "cache_read_input_token_cost": 2e-7, "input_cost_per_token": 8e-7, - "output_cost_per_token": 4e-6, - "range": [128000.0, 256000.0] + "output_cost_per_token": 0.000004, + "range": [ + 128000, + 256000 + ] }, { "cache_read_input_token_cost": 4e-7, - "input_cost_per_token": 1.6e-6, - "output_cost_per_token": 9.6e-6, - "range": [256000.0, 1000000.0] + "input_cost_per_token": 0.0000016, + "output_cost_per_token": 0.0000096, + "range": [ + 256000, + 1000000 + ] } ] }, @@ -6829,23 +7775,35 @@ "tiered_pricing": [ { "input_cost_per_token": 3e-7, - "output_cost_per_token": 1.5e-6, - "range": [0, 32000.0] + "output_cost_per_token": 0.0000015, + "range": [ + 0, + 32000 + ] }, { "input_cost_per_token": 5e-7, - "output_cost_per_token": 2.5e-6, - "range": [32000.0, 128000.0] + "output_cost_per_token": 0.0000025, + "range": [ + 32000, + 128000 + ] }, { "input_cost_per_token": 8e-7, - "output_cost_per_token": 4e-6, - "range": [128000.0, 256000.0] + "output_cost_per_token": 0.000004, + "range": [ + 128000, + 256000 + ] }, { - "input_cost_per_token": 1.6e-6, - "output_cost_per_token": 9.6e-6, - "range": [256000.0, 1000000.0] + "input_cost_per_token": 0.0000016, + "output_cost_per_token": 0.0000096, + "range": [ + 256000, + 1000000 + ] } ] }, @@ -6862,27 +7820,39 @@ "tiered_pricing": [ { "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 1e-6, - "output_cost_per_token": 5e-6, - "range": [0, 32000.0] + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "range": [ + 0, + 32000 + ] }, { "cache_read_input_token_cost": 1.8e-7, - "input_cost_per_token": 1.8e-6, - "output_cost_per_token": 9e-6, - "range": [32000.0, 128000.0] + "input_cost_per_token": 0.0000018, + "output_cost_per_token": 0.000009, + "range": [ + 32000, + 128000 + ] }, { "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "output_cost_per_token": 1.5e-5, - "range": [128000.0, 256000.0] + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "range": [ + 128000, + 256000 + ] }, { "cache_read_input_token_cost": 6e-7, - "input_cost_per_token": 6e-6, - "output_cost_per_token": 6e-5, - "range": [256000.0, 1000000.0] + "input_cost_per_token": 0.000006, + "output_cost_per_token": 0.00006, + "range": [ + 256000, + 1000000 + ] } ] }, @@ -6898,24 +7868,36 @@ "supports_tool_choice": true, "tiered_pricing": [ { - "input_cost_per_token": 1e-6, - "output_cost_per_token": 5e-6, - "range": [0, 32000.0] + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "range": [ + 0, + 32000 + ] }, { - "input_cost_per_token": 1.8e-6, - "output_cost_per_token": 9e-6, - "range": [32000.0, 128000.0] + "input_cost_per_token": 0.0000018, + "output_cost_per_token": 0.000009, + "range": [ + 32000, + 128000 + ] }, { - "input_cost_per_token": 3e-6, - "output_cost_per_token": 1.5e-5, - "range": [128000.0, 256000.0] + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "range": [ + 128000, + 256000 + ] }, { - "input_cost_per_token": 6e-6, - "output_cost_per_token": 6e-5, - "range": [256000.0, 1000000.0] + "input_cost_per_token": 0.000006, + "output_cost_per_token": 0.00006, + "range": [ + 256000, + 1000000 + ] } ] }, @@ -6931,72 +7913,353 @@ "supports_tool_choice": true, "tiered_pricing": [ { - "input_cost_per_token": 1.2e-6, - "output_cost_per_token": 6e-6, - "range": [0, 32000.0] + "input_cost_per_token": 0.0000012, + "output_cost_per_token": 0.000006, + "range": [ + 0, + 32000 + ] }, { - "input_cost_per_token": 2.4e-6, - "output_cost_per_token": 1.2e-5, - "range": [32000.0, 128000.0] + "input_cost_per_token": 0.0000024, + "output_cost_per_token": 0.000012, + "range": [ + 32000, + 128000 + ] }, { - "input_cost_per_token": 3e-6, - "output_cost_per_token": 1.5e-5, - "range": [128000.0, 252000.0] + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "range": [ + 128000, + 252000 + ] } ] }, - "dashscope/qwq-plus": { - "input_cost_per_token": 8e-7, - "litellm_provider": "dashscope", - "max_input_tokens": 98304, - "max_output_tokens": 8192, - "max_tokens": 131072, + "dashscope/qwq-plus": { + "input_cost_per_token": 8e-7, + "litellm_provider": "dashscope", + "max_input_tokens": 98304, + "max_output_tokens": 8192, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0.0000024, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-bge-large-en": { + "input_cost_per_token": 1.0003e-7, + "input_dbu_cost_per_token": 0.000001429, + "litellm_provider": "databricks", + "max_input_tokens": 512, + "max_tokens": 512, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "embedding", + "output_cost_per_token": 0, + "output_dbu_cost_per_token": 0, + "output_vector_size": 1024, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-claude-3-7-sonnet": { + "input_cost_per_token": 0.0000029999900000000002, + "input_dbu_cost_per_token": 0.000042857, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.000015000020000000002, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-haiku-4-5": { + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.00000500003, + "output_dbu_cost_per_token": 0.000071429, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4": { + "input_cost_per_token": 0.000015000020000000002, + "input_dbu_cost_per_token": 0.000214286, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.00007500003000000001, + "output_dbu_cost_per_token": 0.001071429, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4-1": { + "input_cost_per_token": 0.000015000020000000002, + "input_dbu_cost_per_token": 0.000214286, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.00007500003000000001, + "output_dbu_cost_per_token": 0.001071429, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4-5": { + "input_cost_per_token": 0.00000500003, + "input_dbu_cost_per_token": 0.000071429, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.000025000010000000002, + "output_dbu_cost_per_token": 0.000357143, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4": { + "input_cost_per_token": 0.0000029999900000000002, + "input_dbu_cost_per_token": 0.000042857, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.000015000020000000002, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4-1": { + "input_cost_per_token": 0.0000029999900000000002, + "input_dbu_cost_per_token": 0.000042857, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.000015000020000000002, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4-5": { + "input_cost_per_token": 0.0000029999900000000002, + "input_dbu_cost_per_token": 0.000042857, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.000015000020000000002, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-gemini-2-5-flash": { + "input_cost_per_token": 3.0001999999999996e-7, + "input_dbu_cost_per_token": 0.000004285999999999999, + "litellm_provider": "databricks", + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 1048576, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.00000249998, + "output_dbu_cost_per_token": 0.000035714, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "databricks/databricks-gemini-2-5-pro": { + "input_cost_per_token": 0.00000124999, + "input_dbu_cost_per_token": 0.000017857, + "litellm_provider": "databricks", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 1048576, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, "mode": "chat", - "output_cost_per_token": 2.4e-6, - "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "output_cost_per_token": 0.000009999990000000002, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", "supports_function_calling": true, - "supports_reasoning": true, "supports_tool_choice": true }, - "databricks/databricks-bge-large-en": { - "input_cost_per_token": 1.0003e-7, - "input_dbu_cost_per_token": 1.429e-6, + "databricks/databricks-gemma-3-12b": { + "input_cost_per_token": 1.5000999999999998e-7, + "input_dbu_cost_per_token": 0.0000021429999999999996, "litellm_provider": "databricks", - "max_input_tokens": 512, - "max_tokens": 512, + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "max_tokens": 128000, "metadata": { - "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, - "mode": "embedding", - "output_cost_per_token": 0.0, - "output_dbu_cost_per_token": 0.0, - "output_vector_size": 1024, + "mode": "chat", + "output_cost_per_token": 5.0001e-7, + "output_dbu_cost_per_token": 0.000007143, "source": "https://www.databricks.com/product/pricing/foundation-model-serving" }, - "databricks/databricks-claude-3-7-sonnet": { - "input_cost_per_token": 2.5e-6, - "input_dbu_cost_per_token": 3.571e-5, + "databricks/databricks-gpt-5": { + "input_cost_per_token": 0.00000124999, + "input_dbu_cost_per_token": 0.000017857, "litellm_provider": "databricks", - "max_input_tokens": 200000, + "max_input_tokens": 400000, "max_output_tokens": 128000, - "max_tokens": 200000, + "max_tokens": 400000, "metadata": { - "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 1.7857e-5, - "output_db_cost_per_token": 0.000214286, - "source": "https://www.databricks.com/product/pricing/foundation-model-serving", - "supports_assistant_prefill": true, - "supports_function_calling": true, - "supports_reasoning": true, - "supports_tool_choice": true + "output_cost_per_token": 0.000009999990000000002, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-1": { + "input_cost_per_token": 0.00000124999, + "input_dbu_cost_per_token": 0.000017857, + "litellm_provider": "databricks", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 400000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.000009999990000000002, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-mini": { + "input_cost_per_token": 2.4997000000000006e-7, + "input_dbu_cost_per_token": 0.000003571, + "litellm_provider": "databricks", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 400000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.0000019999700000000004, + "output_dbu_cost_per_token": 0.000028571, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-nano": { + "input_cost_per_token": 4.998e-8, + "input_dbu_cost_per_token": 7.14e-7, + "litellm_provider": "databricks", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 400000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 3.9998000000000007e-7, + "output_dbu_cost_per_token": 0.000005714000000000001, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-oss-120b": { + "input_cost_per_token": 1.5000999999999998e-7, + "input_dbu_cost_per_token": 0.0000021429999999999996, + "litellm_provider": "databricks", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 5.9997e-7, + "output_dbu_cost_per_token": 0.000008571, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-gpt-oss-20b": { + "input_cost_per_token": 7e-8, + "input_dbu_cost_per_token": 0.000001, + "litellm_provider": "databricks", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 3.0001999999999996e-7, + "output_dbu_cost_per_token": 0.000004285999999999999, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" }, "databricks/databricks-gte-large-en": { - "input_cost_per_token": 1.2999e-7, - "input_dbu_cost_per_token": 1.857e-6, + "input_cost_per_token": 1.2999000000000001e-7, + "input_dbu_cost_per_token": 0.000001857, "litellm_provider": "databricks", "max_input_tokens": 8192, "max_tokens": 8192, @@ -7004,14 +8267,14 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "embedding", - "output_cost_per_token": 0.0, - "output_dbu_cost_per_token": 0.0, + "output_cost_per_token": 0, + "output_dbu_cost_per_token": 0, "output_vector_size": 1024, "source": "https://www.databricks.com/product/pricing/foundation-model-serving" }, "databricks/databricks-llama-2-70b-chat": { "input_cost_per_token": 5.0001e-7, - "input_dbu_cost_per_token": 7.143e-6, + "input_dbu_cost_per_token": 0.000007143, "litellm_provider": "databricks", "max_input_tokens": 4096, "max_output_tokens": 4096, @@ -7020,14 +8283,14 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 1.5e-6, - "output_dbu_cost_per_token": 2.1429e-5, + "output_cost_per_token": 0.0000015000300000000002, + "output_dbu_cost_per_token": 0.000021429, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-llama-4-maverick": { - "input_cost_per_token": 5e-6, - "input_dbu_cost_per_token": 7.143e-5, + "input_cost_per_token": 5.0001e-7, + "input_dbu_cost_per_token": 0.000007143, "litellm_provider": "databricks", "max_input_tokens": 128000, "max_output_tokens": 128000, @@ -7036,14 +8299,14 @@ "notes": "Databricks documentation now provides both DBU costs (_dbu_cost_per_token) and dollar costs(_cost_per_token)." }, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_dbu_cost_per_token": 0.00021429, + "output_cost_per_token": 0.0000015000300000000002, + "output_dbu_cost_per_token": 0.000021429, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-meta-llama-3-1-405b-instruct": { - "input_cost_per_token": 5e-6, - "input_dbu_cost_per_token": 7.1429e-5, + "input_cost_per_token": 0.00000500003, + "input_dbu_cost_per_token": 0.000071429, "litellm_provider": "databricks", "max_input_tokens": 128000, "max_output_tokens": 128000, @@ -7052,14 +8315,29 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 1.500002e-5, - "output_db_cost_per_token": 0.000214286, + "output_cost_per_token": 0.000015000020000000002, + "output_dbu_cost_per_token": 0.000214286, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, + "databricks/databricks-meta-llama-3-1-8b-instruct": { + "input_cost_per_token": 1.5000999999999998e-7, + "input_dbu_cost_per_token": 0.0000021429999999999996, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 200000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 4.5003000000000007e-7, + "output_dbu_cost_per_token": 0.000006429000000000001, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, "databricks/databricks-meta-llama-3-3-70b-instruct": { - "input_cost_per_token": 1.00002e-6, - "input_dbu_cost_per_token": 1.4286e-5, + "input_cost_per_token": 5.0001e-7, + "input_dbu_cost_per_token": 0.000007143, "litellm_provider": "databricks", "max_input_tokens": 128000, "max_output_tokens": 128000, @@ -7068,14 +8346,14 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 2.99999e-6, - "output_dbu_cost_per_token": 4.2857e-5, + "output_cost_per_token": 0.0000015000300000000002, + "output_dbu_cost_per_token": 0.000021429, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-meta-llama-3-70b-instruct": { - "input_cost_per_token": 1.00002e-6, - "input_dbu_cost_per_token": 1.4286e-5, + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, "litellm_provider": "databricks", "max_input_tokens": 128000, "max_output_tokens": 128000, @@ -7084,14 +8362,14 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 2.99999e-6, - "output_dbu_cost_per_token": 4.2857e-5, + "output_cost_per_token": 0.0000029999900000000002, + "output_dbu_cost_per_token": 0.000042857, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-mixtral-8x7b-instruct": { "input_cost_per_token": 5.0001e-7, - "input_dbu_cost_per_token": 7.143e-6, + "input_dbu_cost_per_token": 0.000007143, "litellm_provider": "databricks", "max_input_tokens": 4096, "max_output_tokens": 4096, @@ -7100,14 +8378,14 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 9.9902e-7, - "output_dbu_cost_per_token": 1.4286e-5, + "output_cost_per_token": 0.00000100002, + "output_dbu_cost_per_token": 0.000014286, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-mpt-30b-instruct": { - "input_cost_per_token": 9.9902e-7, - "input_dbu_cost_per_token": 1.4286e-5, + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, "litellm_provider": "databricks", "max_input_tokens": 8192, "max_output_tokens": 8192, @@ -7116,14 +8394,14 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 9.9902e-7, - "output_dbu_cost_per_token": 1.4286e-5, + "output_cost_per_token": 0.00000100002, + "output_dbu_cost_per_token": 0.000014286, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, "databricks/databricks-mpt-7b-instruct": { "input_cost_per_token": 5.0001e-7, - "input_dbu_cost_per_token": 7.143e-6, + "input_dbu_cost_per_token": 0.000007143, "litellm_provider": "databricks", "max_input_tokens": 8192, "max_output_tokens": 8192, @@ -7132,8 +8410,8 @@ "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, "mode": "chat", - "output_cost_per_token": 0.0, - "output_dbu_cost_per_token": 0.0, + "output_cost_per_token": 0, + "output_dbu_cost_per_token": 0, "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, @@ -7143,13 +8421,13 @@ "mode": "search" }, "davinci-002": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "text-completion-openai", "max_input_tokens": 16384, "max_output_tokens": 4096, "max_tokens": 16384, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "deepgram/base": { "input_cost_per_second": 0.00020833, @@ -7159,9 +8437,11 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-conversationalai": { "input_cost_per_second": 0.00020833, @@ -7171,9 +8451,11 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-finance": { "input_cost_per_second": 0.00020833, @@ -7183,9 +8465,11 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-general": { "input_cost_per_second": 0.00020833, @@ -7195,9 +8479,11 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-meeting": { "input_cost_per_second": 0.00020833, @@ -7207,9 +8493,11 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-phonecall": { "input_cost_per_second": 0.00020833, @@ -7219,9 +8507,11 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-video": { "input_cost_per_second": 0.00020833, @@ -7231,9 +8521,11 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-voicemail": { "input_cost_per_second": 0.00020833, @@ -7243,9 +8535,11 @@ "original_pricing_per_minute": 0.0125 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced": { "input_cost_per_second": 0.00024167, @@ -7255,9 +8549,11 @@ "original_pricing_per_minute": 0.0145 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced-finance": { "input_cost_per_second": 0.00024167, @@ -7267,9 +8563,11 @@ "original_pricing_per_minute": 0.0145 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced-general": { "input_cost_per_second": 0.00024167, @@ -7279,9 +8577,11 @@ "original_pricing_per_minute": 0.0145 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced-meeting": { "input_cost_per_second": 0.00024167, @@ -7291,9 +8591,11 @@ "original_pricing_per_minute": 0.0145 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced-phonecall": { "input_cost_per_second": 0.00024167, @@ -7303,213 +8605,249 @@ "original_pricing_per_minute": 0.0145 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-atc": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-automotive": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-conversationalai": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-drivethru": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-finance": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-general": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-meeting": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-phonecall": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-video": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-voicemail": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-3": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-3-general": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-3-medical": { - "input_cost_per_second": 8.667e-5, + "input_cost_per_second": 0.00008667, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0052/60 seconds = $0.00008667 per second (multilingual)", "original_pricing_per_minute": 0.0052 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-general": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-phonecall": { - "input_cost_per_second": 7.167e-5, + "input_cost_per_second": 0.00007167, "litellm_provider": "deepgram", "metadata": { "calculation": "$0.0043/60 seconds = $0.00007167 per second", "original_pricing_per_minute": 0.0043 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper": { "input_cost_per_second": 0.0001, @@ -7518,9 +8856,11 @@ "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-base": { "input_cost_per_second": 0.0001, @@ -7529,9 +8869,11 @@ "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-large": { "input_cost_per_second": 0.0001, @@ -7540,9 +8882,11 @@ "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-medium": { "input_cost_per_second": 0.0001, @@ -7551,9 +8895,11 @@ "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-small": { "input_cost_per_second": 0.0001, @@ -7562,9 +8908,11 @@ "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-tiny": { "input_cost_per_second": 0.0001, @@ -7573,9 +8921,11 @@ "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepinfra/Gryphe/MythoMax-L2-13b": { "max_tokens": 4096, @@ -7591,8 +8941,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1e-6, - "output_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7683,7 +9033,7 @@ "max_input_tokens": 262144, "max_output_tokens": 262144, "input_cost_per_token": 3e-7, - "output_cost_per_token": 2.9e-6, + "output_cost_per_token": 0.0000029, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7713,7 +9063,7 @@ "max_input_tokens": 262144, "max_output_tokens": 262144, "input_cost_per_token": 4e-7, - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7723,7 +9073,7 @@ "max_input_tokens": 262144, "max_output_tokens": 262144, "input_cost_per_token": 2.9e-7, - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7733,7 +9083,7 @@ "max_input_tokens": 262144, "max_output_tokens": 262144, "input_cost_per_token": 1.4e-7, - "output_cost_per_token": 1.4e-6, + "output_cost_per_token": 0.0000014, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7743,7 +9093,7 @@ "max_input_tokens": 262144, "max_output_tokens": 262144, "input_cost_per_token": 1.4e-7, - "output_cost_per_token": 1.4e-6, + "output_cost_per_token": 0.0000014, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7783,7 +9133,7 @@ "max_input_tokens": 16384, "max_output_tokens": 16384, "input_cost_per_token": 2.7e-7, - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": false @@ -7792,8 +9142,8 @@ "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "input_cost_per_token": 3.3e-6, - "output_cost_per_token": 1.65e-5, + "input_cost_per_token": 0.0000033, + "output_cost_per_token": 0.0000165, "cache_read_input_token_cost": 3.3e-7, "litellm_provider": "deepinfra", "mode": "chat", @@ -7803,8 +9153,8 @@ "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "input_cost_per_token": 1.65e-5, - "output_cost_per_token": 8.25e-5, + "input_cost_per_token": 0.0000165, + "output_cost_per_token": 0.0000825, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7813,8 +9163,8 @@ "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "input_cost_per_token": 3.3e-6, - "output_cost_per_token": 1.65e-5, + "input_cost_per_token": 0.0000033, + "output_cost_per_token": 0.0000165, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7824,7 +9174,7 @@ "max_input_tokens": 163840, "max_output_tokens": 163840, "input_cost_per_token": 7e-7, - "output_cost_per_token": 2.4e-6, + "output_cost_per_token": 0.0000024, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7834,7 +9184,7 @@ "max_input_tokens": 163840, "max_output_tokens": 163840, "input_cost_per_token": 5e-7, - "output_cost_per_token": 2.15e-6, + "output_cost_per_token": 0.00000215, "cache_read_input_token_cost": 4e-7, "litellm_provider": "deepinfra", "mode": "chat", @@ -7844,8 +9194,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 1e-6, - "output_cost_per_token": 3e-6, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7874,8 +9224,8 @@ "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "input_cost_per_token": 1e-6, - "output_cost_per_token": 3e-6, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7905,7 +9255,7 @@ "max_input_tokens": 163840, "max_output_tokens": 163840, "input_cost_per_token": 2.7e-7, - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "cache_read_input_token_cost": 2.16e-7, "litellm_provider": "deepinfra", "mode": "chat", @@ -7917,7 +9267,7 @@ "max_input_tokens": 163840, "max_output_tokens": 163840, "input_cost_per_token": 2.7e-7, - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "cache_read_input_token_cost": 2.16e-7, "litellm_provider": "deepinfra", "mode": "chat", @@ -7938,7 +9288,7 @@ "max_input_tokens": 1000000, "max_output_tokens": 1000000, "input_cost_per_token": 3e-7, - "output_cost_per_token": 2.5e-6, + "output_cost_per_token": 0.0000025, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -7947,8 +9297,8 @@ "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, - "input_cost_per_token": 1.25e-6, - "output_cost_per_token": 1e-5, + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -8178,7 +9528,7 @@ "max_input_tokens": 131072, "max_output_tokens": 131072, "input_cost_per_token": 5e-7, - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -8188,7 +9538,7 @@ "max_input_tokens": 262144, "max_output_tokens": 262144, "input_cost_per_token": 5e-7, - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "cache_read_input_token_cost": 4e-7, "litellm_provider": "deepinfra", "mode": "chat", @@ -8249,13 +9599,13 @@ "max_input_tokens": 131072, "max_output_tokens": 131072, "input_cost_per_token": 4e-7, - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true }, "deepseek/deepseek-chat": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 7e-8, "input_cost_per_token": 2.7e-7, "input_cost_per_token_cache_hit": 7e-8, @@ -8264,7 +9614,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.1e-6, + "output_cost_per_token": 0.0000011, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -8292,7 +9642,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.19e-6, + "output_cost_per_token": 0.00000219, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -8307,7 +9657,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.19e-6, + "output_cost_per_token": 0.00000219, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -8315,7 +9665,7 @@ "supports_tool_choice": true }, "deepseek/deepseek-v3": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 7e-8, "input_cost_per_token": 2.7e-7, "input_cost_per_token_cache_hit": 7e-8, @@ -8324,10 +9674,25 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.1e-6, + "output_cost_per_token": 0.0000011, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true + }, + "deepseek/deepseek-v3.2": { + "input_cost_per_token": 2.8e-7, + "input_cost_per_token_cache_hit": 2.8e-8, + "litellm_provider": "deepseek", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-7, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, + "supports_reasoning": true, "supports_tool_choice": true }, "deepseek.v3-v1:0": { @@ -8337,7 +9702,7 @@ "max_output_tokens": 81920, "max_tokens": 163840, "mode": "chat", - "output_cost_per_token": 1.68e-6, + "output_cost_per_token": 0.00000168, "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true @@ -8352,7 +9717,7 @@ "output_cost_per_token": 5e-7 }, "doubao-embedding": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "volcengine", "max_input_tokens": 4096, "max_tokens": 4096, @@ -8360,11 +9725,11 @@ "notes": "Volcengine Doubao embedding model - standard version with 2560 dimensions" }, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 2560 }, "doubao-embedding-large": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "volcengine", "max_input_tokens": 4096, "max_tokens": 4096, @@ -8372,11 +9737,11 @@ "notes": "Volcengine Doubao embedding model - large version with 2048 dimensions" }, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 2048 }, "doubao-embedding-large-text-240915": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "volcengine", "max_input_tokens": 4096, "max_tokens": 4096, @@ -8384,11 +9749,11 @@ "notes": "Volcengine Doubao embedding model - text-240915 version with 4096 dimensions" }, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 4096 }, "doubao-embedding-large-text-250515": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "volcengine", "max_input_tokens": 4096, "max_tokens": 4096, @@ -8396,11 +9761,11 @@ "notes": "Volcengine Doubao embedding model - text-250515 version with 2048 dimensions" }, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 2048 }, "doubao-embedding-text-240715": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "volcengine", "max_input_tokens": 4096, "max_tokens": 4096, @@ -8408,7 +9773,7 @@ "notes": "Volcengine Doubao embedding model - text-240715 version with 2560 dimensions" }, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 2560 }, "exa_ai/search": { @@ -8416,12 +9781,18 @@ "mode": "search", "tiered_pricing": [ { - "input_cost_per_query": 5e-3, - "max_results_range": [0, 25] + "input_cost_per_query": 0.005, + "max_results_range": [ + 0, + 25 + ] }, { - "input_cost_per_query": 25e-3, - "max_results_range": [26, 100] + "input_cost_per_query": 0.025, + "max_results_range": [ + 26, + 100 + ] } ] }, @@ -8430,44 +9801,74 @@ "mode": "search", "tiered_pricing": [ { - "input_cost_per_query": 1.66e-3, - "max_results_range": [1, 10] + "input_cost_per_query": 0.00166, + "max_results_range": [ + 1, + 10 + ] }, { - "input_cost_per_query": 3.32e-3, - "max_results_range": [11, 20] + "input_cost_per_query": 0.00332, + "max_results_range": [ + 11, + 20 + ] }, { - "input_cost_per_query": 4.98e-3, - "max_results_range": [21, 30] + "input_cost_per_query": 0.00498, + "max_results_range": [ + 21, + 30 + ] }, { - "input_cost_per_query": 6.64e-3, - "max_results_range": [31, 40] + "input_cost_per_query": 0.00664, + "max_results_range": [ + 31, + 40 + ] }, { - "input_cost_per_query": 8.3e-3, - "max_results_range": [41, 50] + "input_cost_per_query": 0.0083, + "max_results_range": [ + 41, + 50 + ] }, { - "input_cost_per_query": 9.96e-3, - "max_results_range": [51, 60] + "input_cost_per_query": 0.00996, + "max_results_range": [ + 51, + 60 + ] }, { - "input_cost_per_query": 11.62e-3, - "max_results_range": [61, 70] + "input_cost_per_query": 0.01162, + "max_results_range": [ + 61, + 70 + ] }, { - "input_cost_per_query": 13.28e-3, - "max_results_range": [71, 80] + "input_cost_per_query": 0.01328, + "max_results_range": [ + 71, + 80 + ] }, { - "input_cost_per_query": 14.94e-3, - "max_results_range": [81, 90] + "input_cost_per_query": 0.01494, + "max_results_range": [ + 81, + 90 + ] }, { - "input_cost_per_query": 16.6e-3, - "max_results_range": [91, 100] + "input_cost_per_query": 0.0166, + "max_results_range": [ + 91, + 100 + ] } ], "metadata": { @@ -8475,20 +9876,20 @@ } }, "perplexity/search": { - "input_cost_per_query": 5e-3, + "input_cost_per_query": 0.005, "litellm_provider": "perplexity", "mode": "search" }, "searxng/search": { "litellm_provider": "searxng", "mode": "search", - "input_cost_per_query": 0.0, + "input_cost_per_query": 0, "metadata": { "notes": "SearXNG is an open-source metasearch engine. Free to use when self-hosted or using public instances." } }, "elevenlabs/scribe_v1": { - "input_cost_per_second": 6.11e-5, + "input_cost_per_second": 0.0000611, "litellm_provider": "elevenlabs", "metadata": { "calculation": "$0.22/hour = $0.00366/minute = $0.0000611 per second (enterprise pricing)", @@ -8496,12 +9897,14 @@ "original_pricing_per_hour": 0.22 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://elevenlabs.io/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "elevenlabs/scribe_v1_experimental": { - "input_cost_per_second": 6.11e-5, + "input_cost_per_second": 0.0000611, "litellm_provider": "elevenlabs", "metadata": { "calculation": "$0.22/hour = $0.00366/minute = $0.0000611 per second (enterprise pricing)", @@ -8509,9 +9912,11 @@ "original_pricing_per_hour": 0.22 }, "mode": "audio_transcription", - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "source": "https://elevenlabs.io/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "embed-english-light-v2.0": { "input_cost_per_token": 1e-7, @@ -8519,7 +9924,7 @@ "max_input_tokens": 1024, "max_tokens": 1024, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "embed-english-light-v3.0": { "input_cost_per_token": 1e-7, @@ -8527,7 +9932,7 @@ "max_input_tokens": 1024, "max_tokens": 1024, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "embed-english-v2.0": { "input_cost_per_token": 1e-7, @@ -8535,7 +9940,7 @@ "max_input_tokens": 4096, "max_tokens": 4096, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "embed-english-v3.0": { "input_cost_per_image": 0.0001, @@ -8547,7 +9952,7 @@ "notes": "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead." }, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_embedding_image_input": true, "supports_image_input": true }, @@ -8557,7 +9962,7 @@ "max_input_tokens": 768, "max_tokens": 768, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "embed-multilingual-v3.0": { "input_cost_per_token": 1e-7, @@ -8565,7 +9970,16 @@ "max_input_tokens": 1024, "max_tokens": 1024, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, + "supports_embedding_image_input": true + }, + "embed-multilingual-light-v3.0": { + "input_cost_per_token": 0.0001, + "litellm_provider": "cohere", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "embedding", + "output_cost_per_token": 0, "supports_embedding_image_input": true }, "eu.amazon.nova-lite-v1:0": { @@ -8595,13 +10009,13 @@ "supports_response_schema": true }, "eu.amazon.nova-pro-v1:0": { - "input_cost_per_token": 1.05e-6, + "input_cost_per_token": 0.00000105, "litellm_provider": "bedrock_converse", "max_input_tokens": 300000, "max_output_tokens": 10000, "max_tokens": 10000, "mode": "chat", - "output_cost_per_token": 4.2e-6, + "output_cost_per_token": 0.0000042, "source": "https://aws.amazon.com/bedrock/pricing/", "supports_function_calling": true, "supports_pdf_input": true, @@ -8616,7 +10030,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -8625,16 +10039,16 @@ "supports_tool_choice": true }, "eu.anthropic.claude-haiku-4-5-20251001-v1:0": { - "cache_creation_input_token_cost": 1.375e-6, + "cache_creation_input_token_cost": 0.000001375, "cache_read_input_token_cost": 1.1e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "deprecation_date": "2026-10-15", "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5.5e-6, + "output_cost_per_token": 0.0000055, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, "supports_computer_use": true, @@ -8648,13 +10062,13 @@ "tool_use_system_prompt_tokens": 346 }, "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -8662,13 +10076,13 @@ "supports_vision": true }, "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -8679,13 +10093,13 @@ "supports_vision": true }, "eu.anthropic.claude-3-7-sonnet-20250219-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -8703,7 +10117,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -8711,26 +10125,26 @@ "supports_vision": true }, "eu.anthropic.claude-3-opus-20240229-v1:0": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true }, "eu.anthropic.claude-3-sonnet-20240229-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -8738,15 +10152,15 @@ "supports_vision": true }, "eu.anthropic.claude-opus-4-1-20250805-v1:0": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -8764,15 +10178,15 @@ "tool_use_system_prompt_tokens": 159 }, "eu.anthropic.claude-opus-4-20250514-v1:0": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -8790,19 +10204,19 @@ "tool_use_system_prompt_tokens": 159 }, "eu.anthropic.claude-sonnet-4-20250514-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -8820,19 +10234,19 @@ "tool_use_system_prompt_tokens": 159 }, "eu.anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 4.125e-6, + "cache_creation_input_token_cost": 0.000004125, "cache_read_input_token_cost": 3.3e-7, - "input_cost_per_token": 3.3e-6, - "input_cost_per_token_above_200k_tokens": 6.6e-6, - "output_cost_per_token_above_200k_tokens": 2.475e-5, - "cache_creation_input_token_cost_above_200k_tokens": 8.25e-6, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.65e-5, + "output_cost_per_token": 0.0000165, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -8872,13 +10286,13 @@ "supports_tool_choice": false }, "eu.mistral.pixtral-large-2502-v1:0": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "bedrock_converse", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_function_calling": true, "supports_tool_choice": false }, @@ -8886,73 +10300,97 @@ "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/flux-pro/v1.1": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.04, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/flux-pro/v1.1-ultra": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.06, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/flux/schnell": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.003, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/bytedance/seedream/v3/text-to-image": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.03, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/bytedance/dreamina/v3.1/text-to-image": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.03, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/ideogram/v3": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.06, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/imagen4/preview": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/imagen4/preview/fast": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.02, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/imagen4/preview/ultra": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.06, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/recraft/v3/text-to-image": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/stable-diffusion-v35-medium": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "featherless_ai/featherless-ai/Qwerky-72B": { "litellm_provider": "featherless_ai", @@ -8974,9 +10412,9 @@ "output_cost_per_token": 2e-7 }, "fireworks-ai-56b-to-176b": { - "input_cost_per_token": 1.2e-6, + "input_cost_per_token": 0.0000012, "litellm_provider": "fireworks_ai", - "output_cost_per_token": 1.2e-6 + "output_cost_per_token": 0.0000012 }, "fireworks-ai-above-16b": { "input_cost_per_token": 9e-7, @@ -8984,19 +10422,19 @@ "output_cost_per_token": 9e-7 }, "fireworks-ai-default": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "fireworks_ai", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "fireworks-ai-embedding-150m-to-350m": { "input_cost_per_token": 1.6e-8, "litellm_provider": "fireworks_ai-embedding-models", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "fireworks-ai-embedding-up-to-150m": { "input_cost_per_token": 8e-9, "litellm_provider": "fireworks_ai-embedding-models", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "fireworks-ai-moe-up-to-56b": { "input_cost_per_token": 5e-7, @@ -9014,42 +10452,42 @@ "max_input_tokens": 512, "max_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://fireworks.ai/pricing" }, "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct": { - "input_cost_per_token": 1.2e-6, + "input_cost_per_token": 0.0000012, "litellm_provider": "fireworks_ai", "max_input_tokens": 65536, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://fireworks.ai/pricing", "supports_function_calling": false, "supports_response_schema": true, "supports_tool_choice": false }, "fireworks_ai/accounts/fireworks/models/deepseek-r1": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "fireworks_ai", "max_input_tokens": 128000, "max_output_tokens": 20480, "max_tokens": 20480, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "source": "https://fireworks.ai/pricing", "supports_response_schema": true, "supports_tool_choice": false }, "fireworks_ai/accounts/fireworks/models/deepseek-r1-0528": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "fireworks_ai", "max_input_tokens": 160000, "max_output_tokens": 160000, "max_tokens": 160000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "source": "https://fireworks.ai/pricing", "supports_response_schema": true, "supports_tool_choice": false @@ -9061,7 +10499,7 @@ "max_output_tokens": 20480, "max_tokens": 20480, "mode": "chat", - "output_cost_per_token": 2.19e-6, + "output_cost_per_token": 0.00000219, "source": "https://fireworks.ai/pricing", "supports_response_schema": true, "supports_tool_choice": false @@ -9097,7 +10535,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.68e-6, + "output_cost_per_token": 0.00000168, "source": "https://fireworks.ai/pricing", "supports_response_schema": true, "supports_tool_choice": true @@ -9109,11 +10547,24 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.68e-6, + "output_cost_per_token": 0.00000168, "source": "https://fireworks.ai/pricing", "supports_response_schema": true, "supports_tool_choice": true }, + "fireworks_ai/accounts/fireworks/models/deepseek-v3p2": { + "input_cost_per_token": 0.0000012, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "source": "https://fireworks.ai/models/fireworks/deepseek-v3p2", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "fireworks_ai/accounts/fireworks/models/firefunction-v2": { "input_cost_per_token": 9e-7, "litellm_provider": "fireworks_ai", @@ -9134,7 +10585,7 @@ "max_output_tokens": 96000, "max_tokens": 96000, "mode": "chat", - "output_cost_per_token": 2.19e-6, + "output_cost_per_token": 0.00000219, "source": "https://fireworks.ai/models/fireworks/glm-4p5", "supports_function_calling": true, "supports_response_schema": true, @@ -9153,6 +10604,19 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "fireworks_ai/accounts/fireworks/models/glm-4p6": { + "input_cost_per_token": 5.5e-7, + "output_cost_per_token": 0.00000219, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 202800, + "max_output_tokens": 202800, + "max_tokens": 202800, + "mode": "chat", + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "fireworks_ai/accounts/fireworks/models/gpt-oss-120b": { "input_cost_per_token": 1.5e-7, "litellm_provider": "fireworks_ai", @@ -9186,8 +10650,21 @@ "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.5e-6, - "source": "https://fireworks.ai/models/fireworks/kimi-k2-instruct", + "output_cost_per_token": 0.0000025, + "source": "https://fireworks.ai/models/fireworks/kimi-k2-instruct", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct-0905": { + "input_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 262144, + "max_output_tokens": 32768, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://app.fireworks.ai/models/fireworks/kimi-k2-instruct-0905", "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true @@ -9199,7 +10676,7 @@ "max_output_tokens": 262144, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 2.5e-6, + "output_cost_per_token": 0.0000025, "source": "https://fireworks.ai/pricing", "supports_function_calling": true, "supports_response_schema": true, @@ -9207,13 +10684,13 @@ "supports_web_search": true }, "fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "fireworks_ai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://fireworks.ai/pricing", "supports_function_calling": true, "supports_response_schema": true, @@ -9310,13 +10787,13 @@ "supports_tool_choice": false }, "fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf": { - "input_cost_per_token": 1.2e-6, + "input_cost_per_token": 0.0000012, "litellm_provider": "fireworks_ai", "max_input_tokens": 65536, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://fireworks.ai/pricing", "supports_function_calling": true, "supports_response_schema": true, @@ -9349,13 +10826,13 @@ "supports_tool_choice": false }, "fireworks_ai/accounts/fireworks/models/yi-large": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "fireworks_ai", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://fireworks.ai/pricing", "supports_function_calling": false, "supports_response_schema": true, @@ -9367,7 +10844,7 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://fireworks.ai/pricing" }, "fireworks_ai/nomic-ai/nomic-embed-text-v1.5": { @@ -9376,7 +10853,7 @@ "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://fireworks.ai/pricing" }, "fireworks_ai/thenlper/gte-base": { @@ -9385,7 +10862,7 @@ "max_input_tokens": 512, "max_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://fireworks.ai/pricing" }, "fireworks_ai/thenlper/gte-large": { @@ -9394,7 +10871,7 @@ "max_input_tokens": 512, "max_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://fireworks.ai/pricing" }, "friendliai/meta-llama-3.1-70b-instruct": { @@ -9426,121 +10903,122 @@ "supports_tool_choice": true }, "ft:babbage-002": { - "input_cost_per_token": 4e-7, + "input_cost_per_token": 0.0000016, "input_cost_per_token_batches": 2e-7, "litellm_provider": "text-completion-openai", "max_input_tokens": 16384, "max_output_tokens": 4096, "max_tokens": 16384, "mode": "completion", - "output_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 2e-7 }, "ft:davinci-002": { - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000012, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "text-completion-openai", "max_input_tokens": 16384, "max_output_tokens": 4096, "max_tokens": 16384, "mode": "completion", - "output_cost_per_token": 2e-6, - "output_cost_per_token_batches": 1e-6 + "output_cost_per_token": 0.000012, + "output_cost_per_token_batches": 0.000001 }, "ft:gpt-3.5-turbo": { - "input_cost_per_token": 3e-6, - "input_cost_per_token_batches": 1.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_batches": 0.0000015, "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-6, - "output_cost_per_token_batches": 3e-6, + "output_cost_per_token": 0.000006, + "output_cost_per_token_batches": 0.000003, "supports_system_messages": true, "supports_tool_choice": true }, "ft:gpt-3.5-turbo-0125": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_system_messages": true, "supports_tool_choice": true }, "ft:gpt-3.5-turbo-0613": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openai", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_system_messages": true, "supports_tool_choice": true }, "ft:gpt-3.5-turbo-1106": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_system_messages": true, "supports_tool_choice": true }, "ft:gpt-4-0613": { - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "openai", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing", "supports_function_calling": true, "supports_system_messages": true, "supports_tool_choice": true }, "ft:gpt-4o-2024-08-06": { - "input_cost_per_token": 3.75e-6, - "input_cost_per_token_batches": 1.875e-6, + "cache_read_input_token_cost": 0.000001875, + "input_cost_per_token": 0.00000375, + "input_cost_per_token_batches": 0.000001875, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_batches": 7.5e-6, + "output_cost_per_token": 0.000015, + "output_cost_per_token_batches": 0.0000075, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, + "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true }, "ft:gpt-4o-2024-11-20": { - "cache_creation_input_token_cost": 1.875e-6, - "input_cost_per_token": 3.75e-6, + "cache_creation_input_token_cost": 0.000001875, + "input_cost_per_token": 0.00000375, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true + "supports_tool_choice": true }, "ft:gpt-4o-mini-2024-07-18": { "cache_read_input_token_cost": 1.5e-7, @@ -9551,7 +11029,7 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "output_cost_per_token_batches": 6e-7, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9559,8 +11037,79 @@ "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true + "supports_tool_choice": true + }, + "ft:gpt-4.1-2025-04-14": { + "cache_read_input_token_cost": 7.5e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_batches": 0.0000015, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0.000012, + "output_cost_per_token_batches": 0.000006, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4.1-mini-2025-04-14": { + "cache_read_input_token_cost": 2e-7, + "input_cost_per_token": 8e-7, + "input_cost_per_token_batches": 4e-7, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0.0000032, + "output_cost_per_token_batches": 0.0000016, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4.1-nano-2025-04-14": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_batches": 1e-7, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-7, + "output_cost_per_token_batches": 4e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:o4-mini-2025-04-16": { + "cache_read_input_token_cost": 0.000001, + "input_cost_per_token": 0.000004, + "input_cost_per_token_batches": 0.000002, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 0.000016, + "output_cost_per_token_batches": 0.000008, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true }, "gemini-1.0-pro": { "input_cost_per_character": 1.25e-7, @@ -9573,7 +11122,7 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_character": 3.75e-7, - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9591,7 +11140,7 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_character": 3.75e-7, - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9609,7 +11158,7 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_character": 3.75e-7, - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9626,7 +11175,7 @@ "max_video_length": 2, "max_videos_per_prompt": 1, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9645,7 +11194,7 @@ "max_video_length": 2, "max_videos_per_prompt": 1, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9663,7 +11212,7 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_character": 3.75e-7, - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9680,23 +11229,23 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_character": 3.75e-7, - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "gemini-1.5-flash": { - "input_cost_per_audio_per_second": 2e-6, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, "input_cost_per_character": 1.875e-8, "input_cost_per_character_above_128k_tokens": 2.5e-7, - "input_cost_per_image": 2e-5, - "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_image": 0.00002, + "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_token": 7.5e-8, - "input_cost_per_token_above_128k_tokens": 1e-6, - "input_cost_per_video_per_second": 2e-5, - "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -9722,16 +11271,16 @@ }, "gemini-1.5-flash-001": { "deprecation_date": "2025-05-24", - "input_cost_per_audio_per_second": 2e-6, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, "input_cost_per_character": 1.875e-8, "input_cost_per_character_above_128k_tokens": 2.5e-7, - "input_cost_per_image": 2e-5, - "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_image": 0.00002, + "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_token": 7.5e-8, - "input_cost_per_token_above_128k_tokens": 1e-6, - "input_cost_per_video_per_second": 2e-5, - "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -9757,16 +11306,16 @@ }, "gemini-1.5-flash-002": { "deprecation_date": "2025-09-24", - "input_cost_per_audio_per_second": 2e-6, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, "input_cost_per_character": 1.875e-8, "input_cost_per_character_above_128k_tokens": 2.5e-7, - "input_cost_per_image": 2e-5, - "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_image": 0.00002, + "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_token": 7.5e-8, - "input_cost_per_token_above_128k_tokens": 1e-6, - "input_cost_per_video_per_second": 2e-5, - "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -9791,16 +11340,16 @@ "supports_vision": true }, "gemini-1.5-flash-exp-0827": { - "input_cost_per_audio_per_second": 2e-6, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, "input_cost_per_character": 1.875e-8, "input_cost_per_character_above_128k_tokens": 2.5e-7, - "input_cost_per_image": 2e-5, - "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_image": 0.00002, + "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_token": 4.688e-9, - "input_cost_per_token_above_128k_tokens": 1e-6, - "input_cost_per_video_per_second": 2e-5, - "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -9825,16 +11374,16 @@ "supports_vision": true }, "gemini-1.5-flash-preview-0514": { - "input_cost_per_audio_per_second": 2e-6, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-6, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, "input_cost_per_character": 1.875e-8, "input_cost_per_character_above_128k_tokens": 2.5e-7, - "input_cost_per_image": 2e-5, - "input_cost_per_image_above_128k_tokens": 4e-5, + "input_cost_per_image": 0.00002, + "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_token": 7.5e-8, - "input_cost_per_token_above_128k_tokens": 1e-6, - "input_cost_per_video_per_second": 2e-5, - "input_cost_per_video_per_second_above_128k_tokens": 4e-5, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -9858,14 +11407,14 @@ "supports_vision": true }, "gemini-1.5-pro": { - "input_cost_per_audio_per_second": 3.125e-5, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, "input_cost_per_character_above_128k_tokens": 6.25e-7, "input_cost_per_image": 0.00032875, "input_cost_per_image_above_128k_tokens": 0.0006575, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_128k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second": 0.00032875, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, "litellm_provider": "vertex_ai-language-models", @@ -9873,10 +11422,10 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_character": 1.25e-6, - "output_cost_per_character_above_128k_tokens": 2.5e-6, - "output_cost_per_token": 5e-6, - "output_cost_per_token_above_128k_tokens": 1e-5, + "output_cost_per_character": 0.00000125, + "output_cost_per_character_above_128k_tokens": 0.0000025, + "output_cost_per_token": 0.000005, + "output_cost_per_token_above_128k_tokens": 0.00001, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9888,14 +11437,14 @@ }, "gemini-1.5-pro-001": { "deprecation_date": "2025-05-24", - "input_cost_per_audio_per_second": 3.125e-5, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, "input_cost_per_character_above_128k_tokens": 6.25e-7, "input_cost_per_image": 0.00032875, "input_cost_per_image_above_128k_tokens": 0.0006575, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_128k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second": 0.00032875, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, "litellm_provider": "vertex_ai-language-models", @@ -9903,10 +11452,10 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_character": 1.25e-6, - "output_cost_per_character_above_128k_tokens": 2.5e-6, - "output_cost_per_token": 5e-6, - "output_cost_per_token_above_128k_tokens": 1e-5, + "output_cost_per_character": 0.00000125, + "output_cost_per_character_above_128k_tokens": 0.0000025, + "output_cost_per_token": 0.000005, + "output_cost_per_token_above_128k_tokens": 0.00001, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9917,14 +11466,14 @@ }, "gemini-1.5-pro-002": { "deprecation_date": "2025-09-24", - "input_cost_per_audio_per_second": 3.125e-5, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, "input_cost_per_character_above_128k_tokens": 6.25e-7, "input_cost_per_image": 0.00032875, "input_cost_per_image_above_128k_tokens": 0.0006575, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_128k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second": 0.00032875, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, "litellm_provider": "vertex_ai-language-models", @@ -9932,10 +11481,10 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_character": 1.25e-6, - "output_cost_per_character_above_128k_tokens": 2.5e-6, - "output_cost_per_token": 5e-6, - "output_cost_per_token_above_128k_tokens": 1e-5, + "output_cost_per_character": 0.00000125, + "output_cost_per_character_above_128k_tokens": 0.0000025, + "output_cost_per_token": 0.000005, + "output_cost_per_token_above_128k_tokens": 0.00001, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-pro", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9945,8 +11494,8 @@ "supports_vision": true }, "gemini-1.5-pro-preview-0215": { - "input_cost_per_audio_per_second": 3.125e-5, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, "input_cost_per_character_above_128k_tokens": 6.25e-7, "input_cost_per_image": 0.00032875, @@ -9960,8 +11509,8 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_character": 1.25e-6, - "output_cost_per_character_above_128k_tokens": 2.5e-6, + "output_cost_per_character": 0.00000125, + "output_cost_per_character_above_128k_tokens": 0.0000025, "output_cost_per_token": 3.125e-7, "output_cost_per_token_above_128k_tokens": 6.25e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -9972,8 +11521,8 @@ "supports_tool_choice": true }, "gemini-1.5-pro-preview-0409": { - "input_cost_per_audio_per_second": 3.125e-5, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, "input_cost_per_character_above_128k_tokens": 6.25e-7, "input_cost_per_image": 0.00032875, @@ -9987,8 +11536,8 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_character": 1.25e-6, - "output_cost_per_character_above_128k_tokens": 2.5e-6, + "output_cost_per_character": 0.00000125, + "output_cost_per_character_above_128k_tokens": 0.0000025, "output_cost_per_token": 3.125e-7, "output_cost_per_token_above_128k_tokens": 6.25e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -9998,8 +11547,8 @@ "supports_tool_choice": true }, "gemini-1.5-pro-preview-0514": { - "input_cost_per_audio_per_second": 3.125e-5, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-5, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, "input_cost_per_character_above_128k_tokens": 6.25e-7, "input_cost_per_image": 0.00032875, @@ -10013,8 +11562,8 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_character": 1.25e-6, - "output_cost_per_character_above_128k_tokens": 2.5e-6, + "output_cost_per_character": 0.00000125, + "output_cost_per_character_above_128k_tokens": 0.0000025, "output_cost_per_token": 3.125e-7, "output_cost_per_token_above_128k_tokens": 6.25e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -10041,8 +11590,16 @@ "mode": "chat", "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -10058,7 +11615,7 @@ "gemini-2.0-flash-001": { "cache_read_input_token_cost": 3.75e-8, "deprecation_date": "2026-02-05", - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 1.5e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10073,8 +11630,16 @@ "mode": "chat", "output_cost_per_token": 6e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10113,8 +11678,16 @@ "output_cost_per_token": 6e-7, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10141,8 +11714,15 @@ "mode": "chat", "output_cost_per_token": 3e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10170,8 +11750,15 @@ "mode": "chat", "output_cost_per_token": 3e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10184,10 +11771,10 @@ }, "gemini-2.0-flash-live-preview-04-09": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 3e-6, - "input_cost_per_image": 3e-6, + "input_cost_per_audio_token": 0.000003, + "input_cost_per_image": 0.000003, "input_cost_per_token": 5e-7, - "input_cost_per_video_per_second": 3e-6, + "input_cost_per_video_per_second": 0.000003, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10199,13 +11786,24 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_audio_token": 1.2e-5, - "output_cost_per_token": 2e-6, + "output_cost_per_audio_token": 0.000012, + "output_cost_per_token": 0.000002, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini#gemini-2-0-flash-live-preview-04-09", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -10236,8 +11834,16 @@ "mode": "chat", "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -10250,7 +11856,7 @@ "supports_web_search": true }, "gemini-2.0-flash-thinking-exp": { - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, "input_cost_per_character": 0, @@ -10277,8 +11883,16 @@ "output_cost_per_token": 0, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10290,7 +11904,7 @@ "supports_web_search": true }, "gemini-2.0-flash-thinking-exp-01-21": { - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, "input_cost_per_character": 0, @@ -10317,8 +11931,16 @@ "output_cost_per_token": 0, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": false, "supports_parallel_function_calling": true, @@ -10332,8 +11954,8 @@ }, "gemini-2.0-pro-exp-02-05": { "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10345,12 +11967,22 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10365,7 +11997,7 @@ }, "gemini-2.5-flash": { "cache_read_input_token_cost": 3e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10378,12 +12010,23 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10399,7 +12042,7 @@ }, "gemini-2.5-flash-image": { "cache_read_input_token_cost": 3e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10413,13 +12056,25 @@ "max_videos_per_prompt": 10, "mode": "image_generation", "output_cost_per_image": 0.039, - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10435,7 +12090,7 @@ }, "gemini-2.5-flash-image-preview": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10449,13 +12104,25 @@ "max_videos_per_prompt": 10, "mode": "image_generation", "output_cost_per_image": 0.039, - "output_cost_per_reasoning_token": 3e-5, - "output_cost_per_token": 3e-5, + "output_cost_per_reasoning_token": 0.00003, + "output_cost_per_token": 0.00003, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10471,20 +12138,31 @@ }, "gemini-3-pro-image-preview": { "input_cost_per_image": 0.0011, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "vertex_ai-language-models", "max_input_tokens": 65536, "max_output_tokens": 32768, "max_tokens": 65536, "mode": "image_generation", "output_cost_per_image": 0.134, - "output_cost_per_token": 1.2e-5, - "output_cost_per_token_batches": 6e-6, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 0.000012, + "output_cost_per_token_batches": 0.000006, "source": "https://ai.google.dev/gemini-api/docs/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": false, "supports_prompt_caching": true, "supports_response_schema": true, @@ -10510,9 +12188,20 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10544,9 +12233,20 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10562,7 +12262,7 @@ }, "gemini-2.5-flash-preview-09-2025": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10575,12 +12275,23 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10596,7 +12307,7 @@ }, "gemini-live-2.5-flash-preview-native-audio-09-2025": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 3e-6, + "input_cost_per_audio_token": 0.000003, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10609,12 +12320,23 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_audio_token": 1.2e-5, - "output_cost_per_token": 2e-6, + "output_cost_per_audio_token": 0.000012, + "output_cost_per_token": 0.000002, "source": "https://ai.google.dev/gemini-api/docs/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -10630,7 +12352,7 @@ }, "gemini/gemini-live-2.5-flash-preview-native-audio-09-2025": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 3e-6, + "input_cost_per_audio_token": 0.000003, "input_cost_per_token": 3e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -10643,13 +12365,24 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_audio_token": 1.2e-5, - "output_cost_per_token": 2e-6, + "output_cost_per_audio_token": 0.000012, + "output_cost_per_token": 0.000002, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -10682,9 +12415,20 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10700,7 +12444,7 @@ }, "gemini-2.5-flash-preview-04-17": { "cache_read_input_token_cost": 3.75e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 1.5e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10713,12 +12457,23 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 3.5e-6, + "output_cost_per_reasoning_token": 0.0000035, "output_cost_per_token": 6e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10733,7 +12488,7 @@ }, "gemini-2.5-flash-preview-05-20": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -10746,12 +12501,23 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10768,8 +12534,8 @@ "gemini-2.5-pro": { "cache_read_input_token_cost": 1.25e-7, "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10781,12 +12547,22 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -10803,9 +12579,9 @@ "cache_read_input_token_cost": 2e-7, "cache_read_input_token_cost_above_200k_tokens": 4e-7, "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_above_200k_tokens": 4e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10817,13 +12593,24 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1.2e-5, - "output_cost_per_token_above_200k_tokens": 1.8e-5, - "output_cost_per_token_batches": 6e-6, + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -10840,9 +12627,9 @@ "cache_read_input_token_cost": 2e-7, "cache_read_input_token_cost_above_200k_tokens": 4e-7, "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_above_200k_tokens": 4e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "vertex_ai", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10854,13 +12641,24 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1.2e-5, - "output_cost_per_token_above_200k_tokens": 1.8e-5, - "output_cost_per_token_batches": 6e-6, + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -10875,8 +12673,8 @@ }, "gemini-2.5-pro-exp-03-25": { "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10888,12 +12686,22 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10908,9 +12716,9 @@ }, "gemini-2.5-pro-preview-03-25": { "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_audio_token": 1.25e-6, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_audio_token": 0.00000125, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10922,12 +12730,23 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10942,9 +12761,9 @@ }, "gemini-2.5-pro-preview-05-06": { "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_audio_token": 1.25e-6, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_audio_token": 0.00000125, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10956,13 +12775,26 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], - "supported_regions": ["global"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supported_regions": [ + "global" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10977,9 +12809,9 @@ }, "gemini-2.5-pro-preview-06-05": { "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_audio_token": 1.25e-6, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_audio_token": 0.00000125, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10991,12 +12823,23 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11012,8 +12855,8 @@ "gemini-2.5-pro-preview-tts": { "cache_read_input_token_cost": 3.125e-7, "input_cost_per_audio_token": 7e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -11025,11 +12868,15 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text"], - "supported_output_modalities": ["audio"], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "audio" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11076,7 +12923,7 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_character": 3.75e-7, - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11108,7 +12955,7 @@ "max_video_length": 2, "max_videos_per_prompt": 1, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11153,7 +13000,7 @@ "tpm": 4000000 }, "gemini/gemini-1.5-flash-001": { - "cache_creation_input_token_cost": 1e-6, + "cache_creation_input_token_cost": 0.000001, "cache_read_input_token_cost": 1.875e-8, "deprecation_date": "2025-05-24", "input_cost_per_token": 7.5e-8, @@ -11182,7 +13029,7 @@ "tpm": 4000000 }, "gemini/gemini-1.5-flash-002": { - "cache_creation_input_token_cost": 1e-6, + "cache_creation_input_token_cost": 0.000001, "cache_read_input_token_cost": 1.875e-8, "deprecation_date": "2025-09-24", "input_cost_per_token": 7.5e-8, @@ -11339,15 +13186,15 @@ "tpm": 4000000 }, "gemini/gemini-1.5-pro": { - "input_cost_per_token": 3.5e-6, - "input_cost_per_token_above_128k_tokens": 7e-6, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, "litellm_provider": "gemini", "max_input_tokens": 2097152, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-5, - "output_cost_per_token_above_128k_tokens": 2.1e-5, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "rpm": 1000, "source": "https://ai.google.dev/pricing", "supports_function_calling": true, @@ -11359,15 +13206,15 @@ }, "gemini/gemini-1.5-pro-001": { "deprecation_date": "2025-05-24", - "input_cost_per_token": 3.5e-6, - "input_cost_per_token_above_128k_tokens": 7e-6, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, "litellm_provider": "gemini", "max_input_tokens": 2097152, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-5, - "output_cost_per_token_above_128k_tokens": 2.1e-5, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "rpm": 1000, "source": "https://ai.google.dev/pricing", "supports_function_calling": true, @@ -11380,15 +13227,15 @@ }, "gemini/gemini-1.5-pro-002": { "deprecation_date": "2025-09-24", - "input_cost_per_token": 3.5e-6, - "input_cost_per_token_above_128k_tokens": 7e-6, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, "litellm_provider": "gemini", "max_input_tokens": 2097152, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-5, - "output_cost_per_token_above_128k_tokens": 2.1e-5, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "rpm": 1000, "source": "https://ai.google.dev/pricing", "supports_function_calling": true, @@ -11400,15 +13247,15 @@ "tpm": 4000000 }, "gemini/gemini-1.5-pro-exp-0801": { - "input_cost_per_token": 3.5e-6, - "input_cost_per_token_above_128k_tokens": 7e-6, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, "litellm_provider": "gemini", "max_input_tokens": 2097152, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-5, - "output_cost_per_token_above_128k_tokens": 2.1e-5, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "rpm": 1000, "source": "https://ai.google.dev/pricing", "supports_function_calling": true, @@ -11438,15 +13285,15 @@ "tpm": 4000000 }, "gemini/gemini-1.5-pro-latest": { - "input_cost_per_token": 3.5e-6, - "input_cost_per_token_above_128k_tokens": 7e-6, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, "litellm_provider": "gemini", "max_input_tokens": 1048576, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-6, - "output_cost_per_token_above_128k_tokens": 2.1e-5, + "output_cost_per_token": 0.00000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "rpm": 1000, "source": "https://ai.google.dev/pricing", "supports_function_calling": true, @@ -11474,8 +13321,16 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -11506,8 +13361,16 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11519,7 +13382,7 @@ "tpm": 10000000 }, "gemini/gemini-2.0-flash-exp": { - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, "input_cost_per_character": 0, @@ -11547,8 +13410,16 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11576,8 +13447,15 @@ "output_cost_per_token": 3e-7, "rpm": 4000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11606,8 +13484,15 @@ "output_cost_per_token": 3e-7, "rpm": 60000, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11620,10 +13505,10 @@ }, "gemini/gemini-2.0-flash-live-001": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 2.1e-6, - "input_cost_per_image": 2.1e-6, + "input_cost_per_audio_token": 0.0000021, + "input_cost_per_image": 0.0000021, "input_cost_per_token": 3.5e-7, - "input_cost_per_video_per_second": 2.1e-6, + "input_cost_per_video_per_second": 0.0000021, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -11635,13 +13520,24 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_audio_token": 8.5e-6, - "output_cost_per_token": 1.5e-6, + "output_cost_per_audio_token": 0.0000085, + "output_cost_per_token": 0.0000015, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2-0-flash-live-001", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -11673,8 +13569,16 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -11687,7 +13591,7 @@ "tpm": 10000000 }, "gemini/gemini-2.0-flash-thinking-exp": { - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, "input_cost_per_character": 0, @@ -11715,8 +13619,16 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11728,7 +13640,7 @@ "tpm": 4000000 }, "gemini/gemini-2.0-flash-thinking-exp-01-21": { - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, "input_cost_per_character": 0, @@ -11756,8 +13668,16 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11770,7 +13690,7 @@ "tpm": 4000000 }, "gemini/gemini-2.0-pro-exp-02-05": { - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, "input_cost_per_character": 0, @@ -11812,7 +13732,7 @@ }, "gemini/gemini-2.5-flash": { "cache_read_input_token_cost": 3e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -11825,13 +13745,24 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11848,7 +13779,7 @@ }, "gemini/gemini-2.5-flash-image": { "cache_read_input_token_cost": 3e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -11863,13 +13794,25 @@ "max_videos_per_prompt": 10, "mode": "image_generation", "output_cost_per_image": 0.039, - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11885,7 +13828,7 @@ }, "gemini/gemini-2.5-flash-image-preview": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -11899,13 +13842,25 @@ "max_videos_per_prompt": 10, "mode": "image_generation", "output_cost_per_image": 0.039, - "output_cost_per_reasoning_token": 3e-5, - "output_cost_per_token": 3e-5, + "output_cost_per_reasoning_token": 0.00003, + "output_cost_per_token": 0.00003, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11921,22 +13876,33 @@ }, "gemini/gemini-3-pro-image-preview": { "input_cost_per_image": 0.0011, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "gemini", "max_input_tokens": 65536, "max_output_tokens": 32768, "max_tokens": 65536, "mode": "image_generation", "output_cost_per_image": 0.134, - "output_cost_per_token": 1.2e-5, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 0.000012, "rpm": 1000, "tpm": 4000000, - "output_cost_per_token_batches": 6e-6, + "output_cost_per_token_batches": 0.000006, "source": "https://ai.google.dev/gemini-api/docs/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": false, "supports_prompt_caching": true, "supports_response_schema": true, @@ -11963,9 +13929,20 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11999,9 +13976,20 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12018,7 +14006,7 @@ }, "gemini/gemini-2.5-flash-preview-09-2025": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -12031,13 +14019,24 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12054,7 +14053,7 @@ }, "gemini/gemini-flash-latest": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -12067,13 +14066,24 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12107,9 +14117,20 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12143,9 +14164,20 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12162,7 +14194,7 @@ }, "gemini/gemini-2.5-flash-preview-04-17": { "cache_read_input_token_cost": 3.75e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 1.5e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -12175,13 +14207,23 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 3.5e-6, + "output_cost_per_reasoning_token": 0.0000035, "output_cost_per_token": 6e-7, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -12196,7 +14238,7 @@ }, "gemini/gemini-2.5-flash-preview-05-20": { "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -12209,13 +14251,23 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -12231,7 +14283,7 @@ }, "gemini/gemini-2.5-flash-preview-tts": { "cache_read_input_token_cost": 3.75e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 1.5e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -12244,13 +14296,20 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 3.5e-6, + "output_cost_per_reasoning_token": 0.0000035, "output_cost_per_token": 6e-7, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text"], - "supported_output_modalities": ["audio"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "audio" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -12264,8 +14323,8 @@ }, "gemini/gemini-2.5-pro": { "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -12277,13 +14336,23 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -12300,9 +14369,9 @@ "gemini/gemini-3-pro-preview": { "cache_read_input_token_cost": 2e-7, "cache_read_input_token_cost_above_200k_tokens": 4e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_above_200k_tokens": 4e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -12314,14 +14383,25 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1.2e-5, - "output_cost_per_token_above_200k_tokens": 1.8e-5, - "output_cost_per_token_batches": 6e-6, + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -12336,9 +14416,9 @@ "tpm": 800000 }, "gemini/gemini-2.5-pro-exp-03-25": { - "cache_read_input_token_cost": 0.0, - "input_cost_per_token": 0.0, - "input_cost_per_token_above_200k_tokens": 0.0, + "cache_read_input_token_cost": 0, + "input_cost_per_token": 0, + "input_cost_per_token_above_200k_tokens": 0, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -12350,13 +14430,23 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0.0, - "output_cost_per_token_above_200k_tokens": 0.0, + "output_cost_per_token": 0, + "output_cost_per_token_above_200k_tokens": 0, "rpm": 5, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -12372,8 +14462,8 @@ "gemini/gemini-2.5-pro-preview-03-25": { "cache_read_input_token_cost": 3.125e-7, "input_cost_per_audio_token": 7e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -12385,12 +14475,19 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -12405,8 +14502,8 @@ "gemini/gemini-2.5-pro-preview-05-06": { "cache_read_input_token_cost": 3.125e-7, "input_cost_per_audio_token": 7e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -12418,12 +14515,19 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -12439,8 +14543,8 @@ "gemini/gemini-2.5-pro-preview-06-05": { "cache_read_input_token_cost": 3.125e-7, "input_cost_per_audio_token": 7e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -12452,12 +14556,19 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -12473,8 +14584,8 @@ "gemini/gemini-2.5-pro-preview-tts": { "cache_read_input_token_cost": 3.125e-7, "input_cost_per_audio_token": 7e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_above_200k_tokens": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -12486,12 +14597,16 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_above_200k_tokens": 1.5e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text"], - "supported_output_modalities": ["audio"], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "audio" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -12566,7 +14681,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-6, + "output_cost_per_token": 0.00000105, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_tool_choice": true, @@ -12578,7 +14693,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-6, + "output_cost_per_token": 0.00000105, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_tool_choice": true, @@ -12592,8 +14707,8 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.05e-6, - "output_cost_per_token_above_128k_tokens": 2.1e-6, + "output_cost_per_token": 0.00000105, + "output_cost_per_token_above_128k_tokens": 0.0000021, "rpd": 30000, "rpm": 360, "source": "https://ai.google.dev/gemini-api/docs/models/gemini", @@ -12609,8 +14724,8 @@ "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 1.05e-6, - "output_cost_per_token_above_128k_tokens": 2.1e-6, + "output_cost_per_token": 0.00000105, + "output_cost_per_token_above_128k_tokens": 0.0000021, "rpd": 30000, "rpm": 360, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -12718,8 +14833,12 @@ "mode": "video_generation", "output_cost_per_second": 0.35, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "gemini/veo-3.0-fast-generate-preview": { "litellm_provider": "gemini", @@ -12728,8 +14847,12 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "gemini/veo-3.0-generate-preview": { "litellm_provider": "gemini", @@ -12738,8 +14861,12 @@ "mode": "video_generation", "output_cost_per_second": 0.75, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "gemini/veo-3.1-fast-generate-preview": { "litellm_provider": "gemini", @@ -12748,8 +14875,12 @@ "mode": "video_generation", "output_cost_per_second": 0.15, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "gemini/veo-3.1-generate-preview": { "litellm_provider": "gemini", @@ -12758,8 +14889,12 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "google_pse/search": { "input_cost_per_query": 0.005, @@ -12767,19 +14902,19 @@ "mode": "search" }, "global.anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -12797,19 +14932,19 @@ "tool_use_system_prompt_tokens": 346 }, "global.anthropic.claude-sonnet-4-20250514-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -12827,15 +14962,15 @@ "tool_use_system_prompt_tokens": 159 }, "global.anthropic.claude-haiku-4-5-20251001-v1:0": { - "cache_creation_input_token_cost": 1.375e-6, + "cache_creation_input_token_cost": 0.000001375, "cache_read_input_token_cost": 1.1e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5.5e-6, + "output_cost_per_token": 0.0000055, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, "supports_computer_use": true, @@ -12849,13 +14984,13 @@ "tool_use_system_prompt_tokens": 346 }, "gpt-3.5-turbo": { - "input_cost_per_token": 0.5e-6, + "input_cost_per_token": 5e-7, "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 4097, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_prompt_caching": true, "supports_system_messages": true, @@ -12868,7 +15003,7 @@ "max_output_tokens": 4096, "max_tokens": 16385, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -12876,25 +15011,25 @@ "supports_tool_choice": true }, "gpt-3.5-turbo-0301": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "openai", "max_input_tokens": 4097, "max_output_tokens": 4096, "max_tokens": 4097, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_prompt_caching": true, "supports_system_messages": true, "supports_tool_choice": true }, "gpt-3.5-turbo-0613": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "openai", "max_input_tokens": 4097, "max_output_tokens": 4096, "max_tokens": 4097, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_prompt_caching": true, "supports_system_messages": true, @@ -12902,13 +15037,13 @@ }, "gpt-3.5-turbo-1106": { "deprecation_date": "2026-09-28", - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 16385, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -12916,55 +15051,55 @@ "supports_tool_choice": true }, "gpt-3.5-turbo-16k": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 16385, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_prompt_caching": true, "supports_system_messages": true, "supports_tool_choice": true }, "gpt-3.5-turbo-16k-0613": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, "max_tokens": 16385, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_prompt_caching": true, "supports_system_messages": true, "supports_tool_choice": true }, "gpt-3.5-turbo-instruct": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "text-completion-openai", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "gpt-3.5-turbo-instruct-0914": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "text-completion-openai", "max_input_tokens": 8192, "max_output_tokens": 4097, "max_tokens": 4097, "mode": "completion", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "gpt-4": { - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "openai", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_prompt_caching": true, "supports_system_messages": true, @@ -12972,13 +15107,13 @@ }, "gpt-4-0125-preview": { "deprecation_date": "2026-03-26", - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -12986,26 +15121,26 @@ "supports_tool_choice": true }, "gpt-4-0314": { - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "openai", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_prompt_caching": true, "supports_system_messages": true, "supports_tool_choice": true }, "gpt-4-0613": { "deprecation_date": "2025-06-06", - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "openai", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_prompt_caching": true, "supports_system_messages": true, @@ -13013,13 +15148,13 @@ }, "gpt-4-1106-preview": { "deprecation_date": "2026-03-26", - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -13028,13 +15163,13 @@ }, "gpt-4-1106-vision-preview": { "deprecation_date": "2024-12-06", - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_system_messages": true, @@ -13042,7 +15177,7 @@ "supports_vision": true }, "gpt-4-32k": { - "input_cost_per_token": 6e-5, + "input_cost_per_token": 0.00006, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 4096, @@ -13054,7 +15189,7 @@ "supports_tool_choice": true }, "gpt-4-32k-0314": { - "input_cost_per_token": 6e-5, + "input_cost_per_token": 0.00006, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 4096, @@ -13066,7 +15201,7 @@ "supports_tool_choice": true }, "gpt-4-32k-0613": { - "input_cost_per_token": 6e-5, + "input_cost_per_token": 0.00006, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 4096, @@ -13078,13 +15213,13 @@ "supports_tool_choice": true }, "gpt-4-turbo": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13094,13 +15229,13 @@ "supports_vision": true }, "gpt-4-turbo-2024-04-09": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13110,13 +15245,13 @@ "supports_vision": true }, "gpt-4-turbo-preview": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13126,13 +15261,13 @@ }, "gpt-4-vision-preview": { "deprecation_date": "2024-12-06", - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_system_messages": true, @@ -13142,20 +15277,29 @@ "gpt-4.1": { "cache_read_input_token_cost": 5e-7, "cache_read_input_token_cost_priority": 8.75e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, - "input_cost_per_token_priority": 3.5e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, + "input_cost_per_token_priority": 0.0000035, "litellm_provider": "openai", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8e-6, - "output_cost_per_token_batches": 4e-6, - "output_cost_per_token_priority": 1.4e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000008, + "output_cost_per_token_batches": 0.000004, + "output_cost_per_token_priority": 0.000014, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13169,18 +15313,27 @@ }, "gpt-4.1-2025-04-14": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "openai", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8e-6, - "output_cost_per_token_batches": 4e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000008, + "output_cost_per_token_batches": 0.000004, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13203,12 +15356,21 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "output_cost_per_token_priority": 2.8e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token_priority": 0.0000028, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13229,11 +15391,20 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13259,9 +15430,18 @@ "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, "output_cost_per_token_priority": 8e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13284,9 +15464,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13299,16 +15488,16 @@ "supports_vision": true }, "gpt-4.5-preview": { - "cache_read_input_token_cost": 3.75e-5, - "input_cost_per_token": 7.5e-5, - "input_cost_per_token_batches": 3.75e-5, + "cache_read_input_token_cost": 0.0000375, + "input_cost_per_token": 0.000075, + "input_cost_per_token_batches": 0.0000375, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00015, - "output_cost_per_token_batches": 7.5e-5, + "output_cost_per_token_batches": 0.000075, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13319,17 +15508,17 @@ "supports_vision": true }, "gpt-4.5-preview-2025-02-27": { - "cache_read_input_token_cost": 3.75e-5, + "cache_read_input_token_cost": 0.0000375, "deprecation_date": "2025-07-14", - "input_cost_per_token": 7.5e-5, - "input_cost_per_token_batches": 3.75e-5, + "input_cost_per_token": 0.000075, + "input_cost_per_token_batches": 0.0000375, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00015, - "output_cost_per_token_batches": 7.5e-5, + "output_cost_per_token_batches": 0.000075, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13340,19 +15529,19 @@ "supports_vision": true }, "gpt-4o": { - "cache_read_input_token_cost": 1.25e-6, - "cache_read_input_token_cost_priority": 2.125e-6, - "input_cost_per_token": 2.5e-6, - "input_cost_per_token_batches": 1.25e-6, - "input_cost_per_token_priority": 4.25e-6, + "cache_read_input_token_cost": 0.00000125, + "cache_read_input_token_cost_priority": 0.000002125, + "input_cost_per_token": 0.0000025, + "input_cost_per_token_batches": 0.00000125, + "input_cost_per_token_priority": 0.00000425, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_batches": 5e-6, - "output_cost_per_token_priority": 1.7e-5, + "output_cost_per_token": 0.00001, + "output_cost_per_token_batches": 0.000005, + "output_cost_per_token_priority": 0.000017, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13364,17 +15553,17 @@ "supports_vision": true }, "gpt-4o-2024-05-13": { - "input_cost_per_token": 5e-6, - "input_cost_per_token_batches": 2.5e-6, - "input_cost_per_token_priority": 8.75e-6, + "input_cost_per_token": 0.000005, + "input_cost_per_token_batches": 0.0000025, + "input_cost_per_token_priority": 0.00000875, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_batches": 7.5e-6, - "output_cost_per_token_priority": 2.625e-5, + "output_cost_per_token": 0.000015, + "output_cost_per_token_batches": 0.0000075, + "output_cost_per_token_priority": 0.00002625, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13384,16 +15573,16 @@ "supports_vision": true }, "gpt-4o-2024-08-06": { - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, - "input_cost_per_token_batches": 1.25e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, + "input_cost_per_token_batches": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_batches": 5e-6, + "output_cost_per_token": 0.00001, + "output_cost_per_token_batches": 0.000005, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13405,16 +15594,16 @@ "supports_vision": true }, "gpt-4o-2024-11-20": { - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, - "input_cost_per_token_batches": 1.25e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, + "input_cost_per_token_batches": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_batches": 5e-6, + "output_cost_per_token": 0.00001, + "output_cost_per_token_batches": 0.000005, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13427,14 +15616,14 @@ }, "gpt-4o-audio-preview": { "input_cost_per_audio_token": 0.0001, - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", "output_cost_per_audio_token": 0.0002, - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13444,14 +15633,14 @@ }, "gpt-4o-audio-preview-2024-10-01": { "input_cost_per_audio_token": 0.0001, - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", "output_cost_per_audio_token": 0.0002, - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13460,15 +15649,15 @@ "supports_tool_choice": true }, "gpt-4o-audio-preview-2024-12-17": { - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 1e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00001, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13477,15 +15666,15 @@ "supports_tool_choice": true }, "gpt-4o-audio-preview-2025-06-03": { - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 1e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00001, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13506,7 +15695,7 @@ "mode": "chat", "output_cost_per_token": 6e-7, "output_cost_per_token_batches": 3e-7, - "output_cost_per_token_priority": 1e-6, + "output_cost_per_token_priority": 0.000001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13544,14 +15733,14 @@ "supports_vision": true }, "gpt-4o-mini-audio-preview": { - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_token": 1.5e-7, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 2e-5, + "output_cost_per_audio_token": 0.00002, "output_cost_per_token": 6e-7, "supports_audio_input": true, "supports_audio_output": true, @@ -13561,14 +15750,14 @@ "supports_tool_choice": true }, "gpt-4o-mini-audio-preview-2024-12-17": { - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_token": 1.5e-7, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 2e-5, + "output_cost_per_audio_token": 0.00002, "output_cost_per_token": 6e-7, "supports_audio_input": true, "supports_audio_output": true, @@ -13580,15 +15769,15 @@ "gpt-4o-mini-realtime-preview": { "cache_creation_input_audio_token_cost": 3e-7, "cache_read_input_token_cost": 3e-7, - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_token": 6e-7, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 2e-5, - "output_cost_per_token": 2.4e-6, + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13599,15 +15788,15 @@ "gpt-4o-mini-realtime-preview-2024-12-17": { "cache_creation_input_audio_token_cost": 3e-7, "cache_read_input_token_cost": 3e-7, - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_token": 6e-7, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 2e-5, - "output_cost_per_token": 2.4e-6, + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13662,37 +15851,46 @@ "supports_vision": true }, "gpt-4o-mini-transcribe": { - "input_cost_per_audio_token": 3e-6, - "input_cost_per_token": 1.25e-6, + "input_cost_per_audio_token": 0.000003, + "input_cost_per_token": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 16000, "max_output_tokens": 2000, "mode": "audio_transcription", - "output_cost_per_token": 5e-6, - "supported_endpoints": ["/v1/audio/transcriptions"] + "output_cost_per_token": 0.000005, + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "gpt-4o-mini-tts": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "mode": "audio_speech", - "output_cost_per_audio_token": 1.2e-5, + "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/audio/speech"], - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["audio"] + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/audio/speech" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "audio" + ] }, "gpt-4o-realtime-preview": { - "cache_read_input_token_cost": 2.5e-6, - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 5e-6, + "cache_read_input_token_cost": 0.0000025, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.000005, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 2e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00002, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13701,17 +15899,17 @@ "supports_tool_choice": true }, "gpt-4o-realtime-preview-2024-10-01": { - "cache_creation_input_audio_token_cost": 2e-5, - "cache_read_input_token_cost": 2.5e-6, + "cache_creation_input_audio_token_cost": 0.00002, + "cache_read_input_token_cost": 0.0000025, "input_cost_per_audio_token": 0.0001, - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", "output_cost_per_audio_token": 0.0002, - "output_cost_per_token": 2e-5, + "output_cost_per_token": 0.00002, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13720,16 +15918,16 @@ "supports_tool_choice": true }, "gpt-4o-realtime-preview-2024-12-17": { - "cache_read_input_token_cost": 2.5e-6, - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 5e-6, + "cache_read_input_token_cost": 0.0000025, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.000005, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 2e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00002, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13738,16 +15936,16 @@ "supports_tool_choice": true }, "gpt-4o-realtime-preview-2025-06-03": { - "cache_read_input_token_cost": 2.5e-6, - "input_cost_per_audio_token": 4e-5, - "input_cost_per_token": 5e-6, + "cache_read_input_token_cost": 0.0000025, + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.000005, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 2e-5, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00002, "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13756,16 +15954,16 @@ "supports_tool_choice": true }, "gpt-4o-search-preview": { - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, - "input_cost_per_token_batches": 1.25e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, + "input_cost_per_token_batches": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_batches": 5e-6, + "output_cost_per_token": 0.00001, + "output_cost_per_token_batches": 0.000005, "search_context_cost_per_query": { "search_context_size_high": 0.05, "search_context_size_low": 0.03, @@ -13782,16 +15980,16 @@ "supports_web_search": true }, "gpt-4o-search-preview-2025-03-11": { - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, - "input_cost_per_token_batches": 1.25e-6, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, + "input_cost_per_token_batches": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_batches": 5e-6, + "output_cost_per_token": 0.00001, + "output_cost_per_token_batches": 0.000005, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -13802,33 +16000,44 @@ "supports_vision": true }, "gpt-4o-transcribe": { - "input_cost_per_audio_token": 6e-6, - "input_cost_per_token": 2.5e-6, + "input_cost_per_audio_token": 0.000006, + "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 16000, "max_output_tokens": 2000, "mode": "audio_transcription", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/audio/transcriptions"] + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "gpt-5": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_flex": 6.25e-8, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "input_cost_per_token_flex": 6.25e-7, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_flex": 5e-6, - "output_cost_per_token_priority": 2e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "output_cost_per_token_flex": 0.000005, + "output_cost_per_token_priority": 0.00002, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13844,18 +16053,27 @@ "gpt-5.1": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_priority": 2e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13871,18 +16089,27 @@ "gpt-5.1-2025-11-13": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_priority": 2e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13898,18 +16125,27 @@ "gpt-5.1-chat-latest": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_priority": 2e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "image"], + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -13922,18 +16158,26 @@ "supports_vision": true }, "gpt-5-pro": { - "input_cost_per_token": 1.5e-5, - "input_cost_per_token_batches": 7.5e-6, + "input_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000075, "litellm_provider": "openai", "max_input_tokens": 400000, "max_output_tokens": 272000, "max_tokens": 272000, "mode": "responses", - "output_cost_per_token": 1.2e-4, - "output_cost_per_token_batches": 6e-5, - "supported_endpoints": ["/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00012, + "output_cost_per_token_batches": 0.00006, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -13947,18 +16191,26 @@ "supports_web_search": true }, "gpt-5-pro-2025-10-06": { - "input_cost_per_token": 1.5e-5, - "input_cost_per_token_batches": 7.5e-6, + "input_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000075, "litellm_provider": "openai", "max_input_tokens": 400000, "max_output_tokens": 272000, "max_tokens": 272000, "mode": "responses", - "output_cost_per_token": 1.2e-4, - "output_cost_per_token_batches": 6e-5, - "supported_endpoints": ["/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00012, + "output_cost_per_token_batches": 0.00006, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -13975,20 +16227,29 @@ "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_flex": 6.25e-8, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "input_cost_per_token_flex": 6.25e-7, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "output_cost_per_token_flex": 5e-6, - "output_cost_per_token_priority": 2e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "output_cost_per_token_flex": 0.000005, + "output_cost_per_token_priority": 0.00002, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14003,16 +16264,25 @@ }, "gpt-5-chat": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -14026,16 +16296,25 @@ }, "gpt-5-chat-latest": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -14049,16 +16328,23 @@ }, "gpt-5-codex": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1e-5, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14073,18 +16359,55 @@ "gpt-5.1-codex": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_priority": 2.5e-7, - "input_cost_per_token": 1.25e-6, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 1e-5, - "output_cost_per_token_priority": 2e-5, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-5.1-codex-max": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 0.00000125, + "litellm_provider": "openai", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14106,11 +16429,18 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 2e-6, - "output_cost_per_token_priority": 3.6e-6, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000002, + "output_cost_per_token_priority": 0.0000036, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14134,12 +16464,21 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, - "output_cost_per_token_flex": 1e-6, - "output_cost_per_token_priority": 3.6e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000002, + "output_cost_per_token_flex": 0.000001, + "output_cost_per_token_priority": 0.0000036, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14164,12 +16503,21 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, - "output_cost_per_token_flex": 1e-6, - "output_cost_per_token_priority": 3.6e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000002, + "output_cost_per_token_flex": 0.000001, + "output_cost_per_token_priority": 0.0000036, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14187,7 +16535,7 @@ "cache_read_input_token_cost_flex": 2.5e-9, "input_cost_per_token": 5e-8, "input_cost_per_token_flex": 2.5e-8, - "input_cost_per_token_priority": 2.5e-6, + "input_cost_per_token_priority": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 272000, "max_output_tokens": 128000, @@ -14195,9 +16543,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_flex": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14221,9 +16578,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_flex": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14239,35 +16605,49 @@ "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "gpt-image-1-mini": { "cache_read_input_image_token_cost": 2.5e-7, "cache_read_input_token_cost": 2e-7, - "input_cost_per_image_token": 2.5e-6, - "input_cost_per_token": 2e-6, + "input_cost_per_image_token": 0.0000025, + "input_cost_per_token": 0.000002, "litellm_provider": "openai", "mode": "chat", - "output_cost_per_image_token": 8e-6, - "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"] + "output_cost_per_image_token": 0.000008, + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ] }, "gpt-realtime": { "cache_creation_input_audio_token_cost": 4e-7, "cache_read_input_token_cost": 4e-7, - "input_cost_per_audio_token": 3.2e-5, - "input_cost_per_image": 5e-6, - "input_cost_per_token": 4e-6, + "input_cost_per_audio_token": 0.000032, + "input_cost_per_image": 0.000005, + "input_cost_per_token": 0.000004, "litellm_provider": "openai", "max_input_tokens": 32000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 6.4e-5, - "output_cost_per_token": 1.6e-5, - "supported_endpoints": ["/v1/realtime"], - "supported_modalities": ["text", "image", "audio"], - "supported_output_modalities": ["text", "audio"], + "output_cost_per_audio_token": 0.000064, + "output_cost_per_token": 0.000016, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -14278,18 +16658,27 @@ "gpt-realtime-mini": { "cache_creation_input_audio_token_cost": 3e-7, "cache_read_input_audio_token_cost": 3e-7, - "input_cost_per_audio_token": 1e-5, + "input_cost_per_audio_token": 0.00001, "input_cost_per_token": 6e-7, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 2e-5, - "output_cost_per_token": 2.4e-6, - "supported_endpoints": ["/v1/realtime"], - "supported_modalities": ["text", "image", "audio"], - "supported_output_modalities": ["text", "audio"], + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -14300,19 +16689,28 @@ "gpt-realtime-2025-08-28": { "cache_creation_input_audio_token_cost": 4e-7, "cache_read_input_token_cost": 4e-7, - "input_cost_per_audio_token": 3.2e-5, - "input_cost_per_image": 5e-6, - "input_cost_per_token": 4e-6, + "input_cost_per_audio_token": 0.000032, + "input_cost_per_image": 0.000005, + "input_cost_per_token": 0.000004, "litellm_provider": "openai", "max_input_tokens": 32000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 6.4e-5, - "output_cost_per_token": 1.6e-5, - "supported_endpoints": ["/v1/realtime"], - "supported_modalities": ["text", "image", "audio"], - "supported_output_modalities": ["text", "audio"], + "output_cost_per_audio_token": 0.000064, + "output_cost_per_token": 0.000016, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -14324,18 +16722,26 @@ "litellm_provider": "gradient_ai", "max_tokens": 2048, "mode": "chat", - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3-opus": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "gradient_ai", "max_tokens": 1024, "mode": "chat", - "output_cost_per_token": 7.5e-5, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "output_cost_per_token": 0.000075, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.5-haiku": { @@ -14343,29 +16749,41 @@ "litellm_provider": "gradient_ai", "max_tokens": 1024, "mode": "chat", - "output_cost_per_token": 4e-6, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "output_cost_per_token": 0.000004, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.5-sonnet": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "gradient_ai", "max_tokens": 1024, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "output_cost_per_token": 0.000015, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.7-sonnet": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "gradient_ai", "max_tokens": 1024, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "output_cost_per_token": 0.000015, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/deepseek-r1-distill-llama-70b": { @@ -14374,8 +16792,12 @@ "max_tokens": 8000, "mode": "chat", "output_cost_per_token": 9.9e-7, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/llama3-8b-instruct": { @@ -14384,8 +16806,12 @@ "max_tokens": 512, "mode": "chat", "output_cost_per_token": 2e-7, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/llama3.3-70b-instruct": { @@ -14394,8 +16820,12 @@ "max_tokens": 2048, "mode": "chat", "output_cost_per_token": 6.5e-7, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/mistral-nemo-instruct-2407": { @@ -14404,44 +16834,64 @@ "max_tokens": 512, "mode": "chat", "output_cost_per_token": 3e-7, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/openai-gpt-4o": { "litellm_provider": "gradient_ai", "max_tokens": 16384, "mode": "chat", - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/openai-gpt-4o-mini": { "litellm_provider": "gradient_ai", "max_tokens": 16384, "mode": "chat", - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/openai-o3": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "gradient_ai", "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 8e-6, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "output_cost_per_token": 0.000008, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/openai-o3-mini": { - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "gradient_ai", "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "output_cost_per_token": 0.0000044, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "lemonade/Qwen3-Coder-30B-A3B-Instruct-GGUF": { @@ -14514,14 +16964,14 @@ "output_cost_per_token": 9.9e-7, "supports_function_calling": true, "supports_reasoning": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/distil-whisper-large-v3-en": { - "input_cost_per_second": 5.56e-6, + "input_cost_per_second": 0.00000556, "litellm_provider": "groq", "mode": "audio_transcription", - "output_cost_per_second": 0.0 + "output_cost_per_second": 0 }, "groq/gemma-7b-it": { "deprecation_date": "2024-12-18", @@ -14533,7 +16983,7 @@ "mode": "chat", "output_cost_per_token": 7e-8, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/gemma2-9b-it": { @@ -14545,7 +16995,7 @@ "mode": "chat", "output_cost_per_token": 2e-7, "supports_function_calling": false, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": false }, "groq/llama-3.1-405b-reasoning": { @@ -14557,7 +17007,7 @@ "mode": "chat", "output_cost_per_token": 7.9e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-3.1-70b-versatile": { @@ -14570,7 +17020,7 @@ "mode": "chat", "output_cost_per_token": 7.9e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-3.1-8b-instant": { @@ -14582,7 +17032,7 @@ "mode": "chat", "output_cost_per_token": 8e-8, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-3.2-11b-text-preview": { @@ -14595,7 +17045,7 @@ "mode": "chat", "output_cost_per_token": 1.8e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-3.2-11b-vision-preview": { @@ -14608,7 +17058,7 @@ "mode": "chat", "output_cost_per_token": 1.8e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true, "supports_vision": true }, @@ -14622,7 +17072,7 @@ "mode": "chat", "output_cost_per_token": 4e-8, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-3.2-3b-preview": { @@ -14635,7 +17085,7 @@ "mode": "chat", "output_cost_per_token": 6e-8, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-3.2-90b-text-preview": { @@ -14648,7 +17098,7 @@ "mode": "chat", "output_cost_per_token": 9e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-3.2-90b-vision-preview": { @@ -14661,7 +17111,7 @@ "mode": "chat", "output_cost_per_token": 9e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true, "supports_vision": true }, @@ -14685,7 +17135,7 @@ "mode": "chat", "output_cost_per_token": 7.9e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama-guard-3-8b": { @@ -14706,7 +17156,7 @@ "mode": "chat", "output_cost_per_token": 8e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama3-groq-70b-8192-tool-use-preview": { @@ -14719,7 +17169,7 @@ "mode": "chat", "output_cost_per_token": 8.9e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/llama3-groq-8b-8192-tool-use-preview": { @@ -14732,7 +17182,7 @@ "mode": "chat", "output_cost_per_token": 1.9e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/meta-llama/llama-4-maverick-17b-128e-instruct": { @@ -14778,25 +17228,25 @@ "mode": "chat", "output_cost_per_token": 2.4e-7, "supports_function_calling": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/moonshotai/kimi-k2-instruct": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "groq", "max_input_tokens": 131072, "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true }, "groq/moonshotai/kimi-k2-instruct-0905": { - "input_cost_per_token": 1e-6, - "output_cost_per_token": 3e-6, - "cache_read_input_token_cost": 0.5e-6, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "cache_read_input_token_cost": 5e-7, "litellm_provider": "groq", "max_input_tokens": 262144, "max_output_tokens": 16384, @@ -14837,7 +17287,7 @@ "supports_web_search": true }, "groq/playai-tts": { - "input_cost_per_character": 5e-5, + "input_cost_per_character": 0.00005, "litellm_provider": "groq", "max_input_tokens": 10000, "max_output_tokens": 10000, @@ -14854,38 +17304,38 @@ "output_cost_per_token": 5.9e-7, "supports_function_calling": true, "supports_reasoning": true, - "supports_response_schema": true, + "supports_response_schema": false, "supports_tool_choice": true }, "groq/whisper-large-v3": { - "input_cost_per_second": 3.083e-5, + "input_cost_per_second": 0.00003083, "litellm_provider": "groq", "mode": "audio_transcription", - "output_cost_per_second": 0.0 + "output_cost_per_second": 0 }, "groq/whisper-large-v3-turbo": { - "input_cost_per_second": 1.111e-5, + "input_cost_per_second": 0.00001111, "litellm_provider": "groq", "mode": "audio_transcription", - "output_cost_per_second": 0.0 + "output_cost_per_second": 0 }, "hd/1024-x-1024/dall-e-3": { "input_cost_per_pixel": 7.629e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "hd/1024-x-1792/dall-e-3": { "input_cost_per_pixel": 6.539e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "hd/1792-x-1024/dall-e-3": { "input_cost_per_pixel": 6.539e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "heroku/claude-3-5-haiku": { "litellm_provider": "heroku", @@ -14923,22 +17373,28 @@ "input_cost_per_pixel": 1.59263611e-7, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "high/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "high/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "hyperbolic/NousResearch/Hermes-3-Llama-3.1-70B": { "input_cost_per_token": 1.2e-7, @@ -14993,13 +17449,13 @@ "supports_tool_choice": true }, "hyperbolic/Qwen/Qwen3-235B-A22B": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "hyperbolic", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_system_messages": true, @@ -15136,44 +17592,44 @@ "supports_tool_choice": true }, "hyperbolic/moonshotai/Kimi-K2-Instruct": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "hyperbolic", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_system_messages": true, "supports_tool_choice": true }, "j2-light": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "ai21", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "completion", - "output_cost_per_token": 3e-6 + "output_cost_per_token": 0.000003 }, "j2-mid": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "ai21", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "completion", - "output_cost_per_token": 1e-5 + "output_cost_per_token": 0.00001 }, "j2-ultra": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "ai21", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "completion", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "jamba-1.5": { "input_cost_per_token": 2e-7, @@ -15186,23 +17642,23 @@ "supports_tool_choice": true }, "jamba-1.5-large": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "ai21", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_tool_choice": true }, "jamba-1.5-large@001": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "ai21", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_tool_choice": true }, "jamba-1.5-mini": { @@ -15226,23 +17682,23 @@ "supports_tool_choice": true }, "jamba-large-1.6": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "ai21", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_tool_choice": true }, "jamba-large-1.7": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "ai21", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_tool_choice": true }, "jamba-mini-1.6": { @@ -15276,19 +17732,19 @@ "output_cost_per_token": 1.8e-8 }, "jp.anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 4.125e-6, + "cache_creation_input_token_cost": 0.000004125, "cache_read_input_token_cost": 3.3e-7, - "input_cost_per_token": 3.3e-6, - "input_cost_per_token_above_200k_tokens": 6.6e-6, - "output_cost_per_token_above_200k_tokens": 2.475e-5, - "cache_creation_input_token_cost_above_200k_tokens": 8.25e-6, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.65e-5, + "output_cost_per_token": 0.0000165, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -15306,15 +17762,15 @@ "tool_use_system_prompt_tokens": 346 }, "jp.anthropic.claude-haiku-4-5-20251001-v1:0": { - "cache_creation_input_token_cost": 1.375e-6, + "cache_creation_input_token_cost": 0.000001375, "cache_read_input_token_cost": 1.1e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5.5e-6, + "output_cost_per_token": 0.0000055, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, "supports_computer_use": true, @@ -15596,50 +18052,56 @@ "input_cost_per_pixel": 1.0490417e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "low/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "low/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "luminous-base": { - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "aleph_alpha", "max_tokens": 2048, "mode": "completion", - "output_cost_per_token": 3.3e-5 + "output_cost_per_token": 0.000033 }, "luminous-base-control": { - "input_cost_per_token": 3.75e-5, + "input_cost_per_token": 0.0000375, "litellm_provider": "aleph_alpha", "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 4.125e-5 + "output_cost_per_token": 0.00004125 }, "luminous-extended": { - "input_cost_per_token": 4.5e-5, + "input_cost_per_token": 0.000045, "litellm_provider": "aleph_alpha", "max_tokens": 2048, "mode": "completion", - "output_cost_per_token": 4.95e-5 + "output_cost_per_token": 0.0000495 }, "luminous-extended-control": { - "input_cost_per_token": 5.625e-5, + "input_cost_per_token": 0.00005625, "litellm_provider": "aleph_alpha", "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 6.1875e-5 + "output_cost_per_token": 0.000061875 }, "luminous-supreme": { "input_cost_per_token": 0.000175, @@ -15673,67 +18135,85 @@ "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medium/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medium/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0, - "supported_endpoints": ["/v1/images/generations"] + "output_cost_per_pixel": 0, + "supported_endpoints": [ + "/v1/images/generations" + ] }, "low/1024-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.005, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "low/1024-x-1536/gpt-image-1-mini": { "input_cost_per_image": 0.006, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "low/1536-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.006, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medium/1024-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.011, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medium/1024-x-1536/gpt-image-1-mini": { "input_cost_per_image": 0.015, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medium/1536-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.015, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medlm-large": { - "input_cost_per_character": 5e-6, + "input_cost_per_character": 0.000005, "litellm_provider": "vertex_ai-language-models", "max_input_tokens": 8192, "max_output_tokens": 1024, "max_tokens": 1024, "mode": "chat", - "output_cost_per_character": 1.5e-5, + "output_cost_per_character": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_tool_choice": true }, @@ -15744,7 +18224,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_character": 1e-6, + "output_cost_per_character": 0.000001, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_tool_choice": true }, @@ -15755,25 +18235,25 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "meta.llama2-70b-chat-v1": { - "input_cost_per_token": 1.95e-6, + "input_cost_per_token": 0.00000195, "litellm_provider": "bedrock", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2.56e-6 + "output_cost_per_token": 0.00000256 }, "meta.llama3-1-405b-instruct-v1:0": { - "input_cost_per_token": 5.32e-6, + "input_cost_per_token": 0.00000532, "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.6e-5, + "output_cost_per_token": 0.000016, "supports_function_calling": true, "supports_tool_choice": false }, @@ -15834,13 +18314,13 @@ "supports_tool_choice": false }, "meta.llama3-2-90b-instruct-v1:0": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_tool_choice": false, "supports_vision": true @@ -15857,13 +18337,13 @@ "supports_tool_choice": false }, "meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 2.65e-6, + "input_cost_per_token": 0.00000265, "litellm_provider": "bedrock", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 3.5e-6 + "output_cost_per_token": 0.0000035 }, "meta.llama3-8b-instruct-v1:0": { "input_cost_per_token": 3e-7, @@ -15884,8 +18364,14 @@ "mode": "chat", "output_cost_per_token": 9.7e-7, "output_cost_per_token_batches": 4.85e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": false }, @@ -15899,8 +18385,14 @@ "mode": "chat", "output_cost_per_token": 6.6e-7, "output_cost_per_token_batches": 3.3e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": false }, @@ -15911,8 +18403,12 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": ["text"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -15923,8 +18419,12 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": ["text"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -15935,8 +18435,13 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -15947,8 +18452,13 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -15963,34 +18473,34 @@ "supports_tool_choice": true }, "mistral.mistral-large-2402-v1:0": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "bedrock", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_function_calling": true }, "mistral.mistral-large-2407-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 9e-6, + "output_cost_per_token": 0.000009, "supports_function_calling": true, "supports_tool_choice": true }, "mistral.mistral-small-2402-v1:0": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "bedrock", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_function_calling": true }, "mistral.mixtral-8x7b-instruct-v0:1": { @@ -16004,25 +18514,25 @@ "supports_tool_choice": true }, "mistral/codestral-2405": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "mistral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_assistant_prefill": true, "supports_response_schema": true, "supports_tool_choice": true }, "mistral/codestral-latest": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "mistral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_assistant_prefill": true, "supports_response_schema": true, "supports_tool_choice": true @@ -16046,7 +18556,7 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "source": "https://mistral.ai/news/devstral", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -16082,13 +18592,13 @@ "supports_tool_choice": true }, "mistral/magistral-medium-2506": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 40000, "max_output_tokens": 40000, "max_tokens": 40000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://mistral.ai/news/magistral", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -16097,13 +18607,13 @@ "supports_tool_choice": true }, "mistral/magistral-medium-2509": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 40000, "max_output_tokens": 40000, "max_tokens": 40000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://mistral.ai/news/magistral", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -16113,28 +18623,32 @@ }, "mistral/mistral-ocr-latest": { "litellm_provider": "mistral", - "ocr_cost_per_page": 1e-3, - "annotation_cost_per_page": 3e-3, + "ocr_cost_per_page": 0.001, + "annotation_cost_per_page": 0.003, "mode": "ocr", - "supported_endpoints": ["/v1/ocr"], + "supported_endpoints": [ + "/v1/ocr" + ], "source": "https://mistral.ai/pricing#api-pricing" }, "mistral/mistral-ocr-2505-completion": { "litellm_provider": "mistral", - "ocr_cost_per_page": 1e-3, - "annotation_cost_per_page": 3e-3, + "ocr_cost_per_page": 0.001, + "annotation_cost_per_page": 0.003, "mode": "ocr", - "supported_endpoints": ["/v1/ocr"], + "supported_endpoints": [ + "/v1/ocr" + ], "source": "https://mistral.ai/pricing#api-pricing" }, "mistral/magistral-medium-latest": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 40000, "max_output_tokens": 40000, "max_tokens": 40000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://mistral.ai/news/magistral", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -16149,7 +18663,7 @@ "max_output_tokens": 40000, "max_tokens": 40000, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://mistral.ai/pricing#api-pricing", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -16164,7 +18678,7 @@ "max_output_tokens": 40000, "max_tokens": 40000, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://mistral.ai/pricing#api-pricing", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -16180,91 +18694,106 @@ "mode": "embedding" }, "mistral/codestral-embed": { - "input_cost_per_token": 0.15e-6, + "input_cost_per_token": 1.5e-7, "litellm_provider": "mistral", "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding" }, "mistral/codestral-embed-2505": { - "input_cost_per_token": 0.15e-6, + "input_cost_per_token": 1.5e-7, "litellm_provider": "mistral", "max_input_tokens": 8192, "max_tokens": 8192, "mode": "embedding" }, "mistral/mistral-large-2402": { - "input_cost_per_token": 4e-6, + "input_cost_per_token": 0.000004, "litellm_provider": "mistral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 1.2e-5, + "output_cost_per_token": 0.000012, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true }, "mistral/mistral-large-2407": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "mistral", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 9e-6, + "output_cost_per_token": 0.000009, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true }, "mistral/mistral-large-2411": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true }, "mistral/mistral-large-latest": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true }, + "mistral/mistral-large-3": { + "input_cost_per_token": 5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.0000015, + "source": "https://docs.mistral.ai/models/mistral-large-3-25-12", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, "mistral/mistral-medium": { - "input_cost_per_token": 2.7e-6, + "input_cost_per_token": 0.0000027, "litellm_provider": "mistral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 8.1e-6, + "output_cost_per_token": 0.0000081, "supports_assistant_prefill": true, "supports_response_schema": true, "supports_tool_choice": true }, "mistral/mistral-medium-2312": { - "input_cost_per_token": 2.7e-6, + "input_cost_per_token": 0.0000027, "litellm_provider": "mistral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 8.1e-6, + "output_cost_per_token": 0.0000081, "supports_assistant_prefill": true, "supports_response_schema": true, "supports_tool_choice": true @@ -16276,7 +18805,7 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, @@ -16289,7 +18818,7 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, @@ -16384,13 +18913,13 @@ "supports_tool_choice": true }, "mistral/open-mixtral-8x22b": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 65336, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, @@ -16424,13 +18953,13 @@ "supports_vision": true }, "mistral/pixtral-large-2411": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, @@ -16438,13 +18967,13 @@ "supports_vision": true }, "mistral/pixtral-large-latest": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "mistral", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, @@ -16459,7 +18988,35 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.5e-6, + "output_cost_per_token": 0.0000025, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/kimi-k2-0905-preview": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/kimi-k2-turbo-preview": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 0.00000115, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000008, "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", "supports_function_calling": true, "supports_tool_choice": true, @@ -16467,13 +19024,13 @@ }, "moonshot/kimi-latest": { "cache_read_input_token_cost": 1.5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "moonshot", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true, @@ -16481,13 +19038,13 @@ }, "moonshot/kimi-latest-128k": { "cache_read_input_token_cost": 1.5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "moonshot", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true, @@ -16495,13 +19052,13 @@ }, "moonshot/kimi-latest-32k": { "cache_read_input_token_cost": 1.5e-7, - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "moonshot", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true, @@ -16515,21 +19072,22 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "moonshot/kimi-thinking-preview": { - "input_cost_per_token": 3e-5, + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 6e-7, "litellm_provider": "moonshot", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 3e-5, - "source": "https://platform.moonshot.ai/docs/pricing", + "output_cost_per_token": 0.0000025, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", "supports_vision": true }, "moonshot/kimi-k2-thinking": { @@ -16540,81 +19098,95 @@ "max_output_tokens": 262144, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 2.5e-6, + "output_cost_per_token": 0.0000025, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/kimi-k2-thinking-turbo": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 0.00000115, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000008, "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "moonshot/moonshot-v1-128k": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "moonshot", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true }, "moonshot/moonshot-v1-128k-0430": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "moonshot", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true }, "moonshot/moonshot-v1-128k-vision-preview": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "moonshot", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "moonshot/moonshot-v1-32k": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "moonshot", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true }, "moonshot/moonshot-v1-32k-0430": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "moonshot", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true }, "moonshot/moonshot-v1-32k-vision-preview": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "moonshot", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true, @@ -16627,7 +19199,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true @@ -16639,7 +19211,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true @@ -16651,20 +19223,20 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "moonshot/moonshot-v1-auto": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "moonshot", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://platform.moonshot.ai/docs/pricing", "supports_function_calling": true, "supports_tool_choice": true @@ -16676,7 +19248,7 @@ "max_output_tokens": 16000, "max_tokens": 16000, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "supports_function_calling": false, "supports_parallel_function_calling": false, "supports_system_messages": true, @@ -16690,7 +19262,7 @@ "max_output_tokens": 16000, "max_tokens": 16000, "mode": "chat", - "output_cost_per_token": 1.9e-6, + "output_cost_per_token": 0.0000019, "supports_function_calling": false, "supports_parallel_function_calling": false, "supports_system_messages": true, @@ -16711,8 +19283,14 @@ "output_cost_per_token": 0, "output_vector_size": 768, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", - "supported_endpoints": ["/v1/embeddings"], - "supported_modalities": ["text", "image", "video"] + "supported_endpoints": [ + "/v1/embeddings" + ], + "supported_modalities": [ + "text", + "image", + "video" + ] }, "multimodalembedding@001": { "input_cost_per_character": 2e-7, @@ -16728,8 +19306,14 @@ "output_cost_per_token": 0, "output_vector_size": 768, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", - "supported_endpoints": ["/v1/embeddings"], - "supported_modalities": ["text", "image", "video"] + "supported_endpoints": [ + "/v1/embeddings" + ], + "supported_modalities": [ + "text", + "image", + "video" + ] }, "nscale/Qwen/QwQ-32B": { "input_cost_per_token": 1.8e-7, @@ -16763,9 +19347,11 @@ "input_cost_per_pixel": 1.3e-9, "litellm_provider": "nscale", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { "input_cost_per_token": 3.75e-7, @@ -16868,19 +19454,21 @@ "input_cost_per_pixel": 3e-9, "litellm_provider": "nscale", "mode": "image_generation", - "output_cost_per_pixel": 0.0, + "output_cost_per_pixel": 0, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "o1": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -16892,14 +19480,14 @@ "supports_vision": true }, "o1-2024-12-17": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -16912,55 +19500,55 @@ }, "o1-mini": { "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_vision": true }, "o1-mini-2024-09-12": { "deprecation_date": "2025-10-27", - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 3e-6, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000003, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1.2e-5, + "output_cost_per_token": 0.000012, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_vision": true }, "o1-preview": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_vision": true }, "o1-preview-2024-09-12": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, @@ -16968,7 +19556,7 @@ }, "o1-pro": { "input_cost_per_token": 0.00015, - "input_cost_per_token_batches": 7.5e-5, + "input_cost_per_token_batches": 0.000075, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, @@ -16976,9 +19564,17 @@ "mode": "responses", "output_cost_per_token": 0.0006, "output_cost_per_token_batches": 0.0003, - "supported_endpoints": ["/v1/responses", "/v1/batch"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -16992,7 +19588,7 @@ }, "o1-pro-2025-03-19": { "input_cost_per_token": 0.00015, - "input_cost_per_token_batches": 7.5e-5, + "input_cost_per_token_batches": 0.000075, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, @@ -17000,9 +19596,17 @@ "mode": "responses", "output_cost_per_token": 0.0006, "output_cost_per_token_batches": 0.0003, - "supported_endpoints": ["/v1/responses", "/v1/batch"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -17018,25 +19622,30 @@ "cache_read_input_token_cost": 5e-7, "cache_read_input_token_cost_flex": 2.5e-7, "cache_read_input_token_cost_priority": 8.75e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_flex": 1e-6, - "input_cost_per_token_priority": 3.5e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_flex": 0.000001, + "input_cost_per_token_priority": 0.0000035, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 8e-6, - "output_cost_per_token_flex": 4e-6, - "output_cost_per_token_priority": 1.4e-5, + "output_cost_per_token": 0.000008, + "output_cost_per_token_flex": 0.000004, + "output_cost_per_token_priority": 0.000014, "supported_endpoints": [ "/v1/responses", "/v1/chat/completions", "/v1/completions", "/v1/batch" ], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -17049,21 +19658,26 @@ }, "o3-2025-04-16": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supported_endpoints": [ "/v1/responses", "/v1/chat/completions", "/v1/completions", "/v1/batch" ], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -17075,19 +19689,28 @@ "supports_vision": true }, "o3-deep-research": { - "cache_read_input_token_cost": 2.5e-6, - "input_cost_per_token": 1e-5, - "input_cost_per_token_batches": 5e-6, + "cache_read_input_token_cost": 0.0000025, + "input_cost_per_token": 0.00001, + "input_cost_per_token_batches": 0.000005, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 4e-5, - "output_cost_per_token_batches": 2e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00004, + "output_cost_per_token_batches": 0.00002, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -17099,19 +19722,28 @@ "supports_vision": true }, "o3-deep-research-2025-06-26": { - "cache_read_input_token_cost": 2.5e-6, - "input_cost_per_token": 1e-5, - "input_cost_per_token_batches": 5e-6, + "cache_read_input_token_cost": 0.0000025, + "input_cost_per_token": 0.00001, + "input_cost_per_token_batches": 0.000005, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 4e-5, - "output_cost_per_token_batches": 2e-5, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00004, + "output_cost_per_token_batches": 0.00002, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -17124,13 +19756,13 @@ }, "o3-mini": { "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -17141,13 +19773,13 @@ }, "o3-mini-2025-01-31": { "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -17157,18 +19789,26 @@ "supports_vision": false }, "o3-pro": { - "input_cost_per_token": 2e-5, - "input_cost_per_token_batches": 1e-5, + "input_cost_per_token": 0.00002, + "input_cost_per_token_batches": 0.00001, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 8e-5, - "output_cost_per_token_batches": 4e-5, - "supported_endpoints": ["/v1/responses", "/v1/batch"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00008, + "output_cost_per_token_batches": 0.00004, + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -17179,18 +19819,26 @@ "supports_vision": true }, "o3-pro-2025-06-10": { - "input_cost_per_token": 2e-5, - "input_cost_per_token_batches": 1e-5, + "input_cost_per_token": 0.00002, + "input_cost_per_token_batches": 0.00001, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 8e-5, - "output_cost_per_token_batches": 4e-5, - "supported_endpoints": ["/v1/responses", "/v1/batch"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00008, + "output_cost_per_token_batches": 0.00004, + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -17204,17 +19852,17 @@ "cache_read_input_token_cost": 2.75e-7, "cache_read_input_token_cost_flex": 1.375e-7, "cache_read_input_token_cost_priority": 5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "input_cost_per_token_flex": 5.5e-7, - "input_cost_per_token_priority": 2e-6, + "input_cost_per_token_priority": 0.000002, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, - "output_cost_per_token_flex": 2.2e-6, - "output_cost_per_token_priority": 8e-6, + "output_cost_per_token": 0.0000044, + "output_cost_per_token_flex": 0.0000022, + "output_cost_per_token_priority": 0.000008, "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -17227,13 +19875,13 @@ }, "o4-mini-2025-04-16": { "cache_read_input_token_cost": 2.75e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -17246,18 +19894,27 @@ }, "o4-mini-deep-research": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 8e-6, - "output_cost_per_token_batches": 4e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000008, + "output_cost_per_token_batches": 0.000004, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -17270,18 +19927,27 @@ }, "o4-mini-deep-research-2025-06-26": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "openai", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "responses", - "output_cost_per_token": 8e-6, - "output_cost_per_token_batches": 4e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000008, + "output_cost_per_token_batches": 0.000004, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -17293,25 +19959,25 @@ "supports_vision": true }, "oci/meta.llama-3.1-405b-instruct": { - "input_cost_per_token": 1.068e-5, + "input_cost_per_token": 0.00001068, "litellm_provider": "oci", "max_input_tokens": 128000, "max_output_tokens": 4000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.068e-5, + "output_cost_per_token": 0.00001068, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, "supports_response_schema": false }, "oci/meta.llama-3.2-90b-vision-instruct": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "oci", "max_input_tokens": 128000, "max_output_tokens": 4000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, "supports_response_schema": false @@ -17353,7 +20019,7 @@ "supports_response_schema": false }, "oci/xai.grok-3": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "oci", "max_input_tokens": 131072, "max_output_tokens": 131072, @@ -17365,13 +20031,13 @@ "supports_response_schema": false }, "oci/xai.grok-3-fast": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "oci", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.5e-5, + "output_cost_per_token": 0.000025, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, "supports_response_schema": false @@ -17395,13 +20061,13 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, "supports_response_schema": false }, "oci/xai.grok-4": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "oci", "max_input_tokens": 128000, "max_output_tokens": 128000, @@ -17413,345 +20079,345 @@ "supports_response_schema": false }, "oci/cohere.command-latest": { - "input_cost_per_token": 1.56e-6, + "input_cost_per_token": 0.00000156, "litellm_provider": "oci", "max_input_tokens": 128000, "max_output_tokens": 4000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.56e-6, + "output_cost_per_token": 0.00000156, "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", "supports_function_calling": true, "supports_response_schema": false }, "oci/cohere.command-a-03-2025": { - "input_cost_per_token": 1.56e-6, + "input_cost_per_token": 0.00000156, "litellm_provider": "oci", "max_input_tokens": 256000, "max_output_tokens": 4000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.56e-6, + "output_cost_per_token": 0.00000156, "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", "supports_function_calling": true, "supports_response_schema": false }, "oci/cohere.command-plus-latest": { - "input_cost_per_token": 1.56e-6, + "input_cost_per_token": 0.00000156, "litellm_provider": "oci", "max_input_tokens": 128000, "max_output_tokens": 4000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.56e-6, + "output_cost_per_token": 0.00000156, "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", "supports_function_calling": true, "supports_response_schema": false }, "ollama/codegeex4": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 32768, "max_output_tokens": 8192, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": false }, "ollama/codegemma": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/codellama": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/deepseek-coder-v2-base": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "completion", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/deepseek-coder-v2-instruct": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 32768, "max_output_tokens": 8192, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/deepseek-coder-v2-lite-base": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "completion", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/deepseek-coder-v2-lite-instruct": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 32768, "max_output_tokens": 8192, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/deepseek-v3.1:671b-cloud": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 163840, "max_output_tokens": 163840, "max_tokens": 163840, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/gpt-oss:120b-cloud": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/gpt-oss:20b-cloud": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/internlm2_5-20b-chat": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 32768, "max_output_tokens": 8192, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/llama2": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/llama2-uncensored": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/llama2:13b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/llama2:70b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/llama2:7b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/llama3": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/llama3.1": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/llama3:70b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/llama3:8b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/mistral": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "completion", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/mistral-7B-Instruct-v0.1": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/mistral-7B-Instruct-v0.2": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/mistral-large-instruct-2407": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 65536, "max_output_tokens": 8192, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/mixtral-8x22B-Instruct-v0.1": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 65536, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/mixtral-8x7B-Instruct-v0.1": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/orca-mini": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "ollama/qwen3-coder:480b-cloud": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 262144, "max_output_tokens": 262144, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_function_calling": true }, "ollama/vicuna": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "ollama", "max_input_tokens": 2048, "max_output_tokens": 2048, "max_tokens": 2048, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "omni-moderation-2024-09-26": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 0, "max_tokens": 32768, "mode": "moderation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "omni-moderation-latest": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 0, "max_tokens": 32768, "mode": "moderation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "omni-moderation-latest-intents": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 0, "max_tokens": 32768, "mode": "moderation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "openai.gpt-oss-120b-1:0": { "input_cost_per_token": 1.5e-7, @@ -17780,31 +20446,31 @@ "supports_tool_choice": true }, "openrouter/anthropic/claude-2": { - "input_cost_per_token": 1.102e-5, + "input_cost_per_token": 0.00001102, "litellm_provider": "openrouter", "max_output_tokens": 8191, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 3.268e-5, + "output_cost_per_token": 0.00003268, "supports_tool_choice": true }, "openrouter/anthropic/claude-3-5-haiku": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "openrouter", "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supports_function_calling": true, "supports_tool_choice": true }, "openrouter/anthropic/claude-3-5-haiku-20241022": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supports_function_calling": true, "supports_tool_choice": true, "tool_use_system_prompt_tokens": 264 @@ -17815,7 +20481,7 @@ "litellm_provider": "openrouter", "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true @@ -17827,20 +20493,20 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true, "tool_use_system_prompt_tokens": 264 }, "openrouter/anthropic/claude-3-opus": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true, @@ -17848,23 +20514,23 @@ }, "openrouter/anthropic/claude-3-sonnet": { "input_cost_per_image": 0.0048, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "openrouter/anthropic/claude-3.5-sonnet": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -17873,13 +20539,13 @@ "tool_use_system_prompt_tokens": 159 }, "openrouter/anthropic/claude-3.5-sonnet:beta": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_computer_use": true, "supports_function_calling": true, "supports_tool_choice": true, @@ -17888,13 +20554,13 @@ }, "openrouter/anthropic/claude-3.7-sonnet": { "input_cost_per_image": 0.0048, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -17905,13 +20571,13 @@ }, "openrouter/anthropic/claude-3.7-sonnet:beta": { "input_cost_per_image": 0.0048, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_computer_use": true, "supports_function_calling": true, "supports_reasoning": true, @@ -17920,25 +20586,25 @@ "tool_use_system_prompt_tokens": 159 }, "openrouter/anthropic/claude-instant-v1": { - "input_cost_per_token": 1.63e-6, + "input_cost_per_token": 0.00000163, "litellm_provider": "openrouter", "max_output_tokens": 8191, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 5.51e-6, + "output_cost_per_token": 0.00000551, "supports_tool_choice": true }, "openrouter/anthropic/claude-opus-4": { "input_cost_per_image": 0.0048, - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -17950,16 +20616,16 @@ }, "openrouter/anthropic/claude-opus-4.1": { "input_cost_per_image": 0.0048, - "cache_creation_input_token_cost": 1.875e-5, - "cache_creation_input_token_cost_above_1hr": 3e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.00003, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -17971,19 +20637,38 @@ }, "openrouter/anthropic/claude-sonnet-4": { "input_cost_per_image": 0.0048, - "cache_creation_input_token_cost": 3.75e-6, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost": 3e-7, "cache_read_input_token_cost_above_200k_tokens": 6e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, "litellm_provider": "openrouter", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-opus-4.5": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.000025, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -17995,19 +20680,19 @@ }, "openrouter/anthropic/claude-sonnet-4.5": { "input_cost_per_image": 0.0048, - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "openrouter", "max_input_tokens": 1000000, "max_output_tokens": 1000000, "max_tokens": 1000000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -18018,15 +20703,15 @@ "tool_use_system_prompt_tokens": 159 }, "openrouter/anthropic/claude-haiku-4.5": { - "cache_creation_input_token_cost": 1.25e-6, + "cache_creation_input_token_cost": 0.00000125, "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 200000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -18056,11 +20741,11 @@ "supports_tool_choice": true }, "openrouter/cohere/command-r-plus": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_tool_choice": true }, "openrouter/databricks/dbrx-instruct": { @@ -18108,6 +20793,21 @@ "supports_reasoning": true, "supports_tool_choice": true }, + "openrouter/deepseek/deepseek-v3.2": { + "input_cost_per_token": 2.8e-7, + "input_cost_per_token_cache_hit": 2.8e-8, + "litellm_provider": "openrouter", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, "openrouter/deepseek/deepseek-v3.2-exp": { "input_cost_per_token": 2e-7, "input_cost_per_token_cache_hit": 2e-8, @@ -18142,7 +20842,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.19e-6, + "output_cost_per_token": 0.00000219, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -18157,7 +20857,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.15e-6, + "output_cost_per_token": 0.00000215, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -18208,7 +20908,7 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 2.5e-6, + "output_cost_per_token": 0.0000025, "supports_audio_output": true, "supports_function_calling": true, "supports_response_schema": true, @@ -18218,7 +20918,7 @@ }, "openrouter/google/gemini-2.5-pro": { "input_cost_per_audio_token": 7e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "openrouter", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -18230,7 +20930,7 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_audio_output": true, "supports_function_calling": true, "supports_response_schema": true, @@ -18242,9 +20942,9 @@ "cache_read_input_token_cost": 2e-7, "cache_read_input_token_cost_above_200k_tokens": 4e-7, "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, - "input_cost_per_token": 2e-6, - "input_cost_per_token_above_200k_tokens": 4e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "openrouter", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -18256,12 +20956,23 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 1.2e-5, - "output_cost_per_token_above_200k_tokens": 1.8e-5, - "output_cost_per_token_batches": 6e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -18276,13 +20987,13 @@ }, "openrouter/google/gemini-pro-1.5": { "input_cost_per_image": 0.00265, - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "openrouter", "max_input_tokens": 1000000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 7.5e-6, + "output_cost_per_token": 0.0000075, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true @@ -18315,27 +21026,27 @@ "supports_tool_choice": true }, "openrouter/gryphe/mythomax-l2-13b": { - "input_cost_per_token": 1.875e-6, + "input_cost_per_token": 0.000001875, "litellm_provider": "openrouter", "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.875e-6, + "output_cost_per_token": 0.000001875, "supports_tool_choice": true }, "openrouter/jondurbin/airoboros-l2-70b-2.1": { - "input_cost_per_token": 1.3875e-5, + "input_cost_per_token": 0.000013875, "litellm_provider": "openrouter", "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.3875e-5, + "output_cost_per_token": 0.000013875, "supports_tool_choice": true }, "openrouter/mancer/weaver": { - "input_cost_per_token": 5.625e-6, + "input_cost_per_token": 0.000005625, "litellm_provider": "openrouter", "max_tokens": 8000, "mode": "chat", - "output_cost_per_token": 5.625e-6, + "output_cost_per_token": 0.000005625, "supports_tool_choice": true }, "openrouter/meta-llama/codellama-34b-instruct": { @@ -18355,11 +21066,11 @@ "supports_tool_choice": true }, "openrouter/meta-llama/llama-2-70b-chat": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "openrouter", "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "supports_tool_choice": true }, "openrouter/meta-llama/llama-3-70b-instruct": { @@ -18383,23 +21094,23 @@ "litellm_provider": "openrouter", "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 2.25e-6, + "output_cost_per_token": 0.00000225, "supports_tool_choice": true }, "openrouter/meta-llama/llama-3-8b-instruct:free": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openrouter", "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_tool_choice": true }, "openrouter/microsoft/wizardlm-2-8x22b:nitro": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "openrouter", "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "supports_tool_choice": true }, "openrouter/minimax/minimax-m2": { @@ -18409,7 +21120,7 @@ "max_output_tokens": 204800, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.02e-6, + "output_cost_per_token": 0.00000102, "supports_function_calling": true, "supports_prompt_caching": false, "supports_reasoning": true, @@ -18424,19 +21135,19 @@ "supports_tool_choice": true }, "openrouter/mistralai/mistral-7b-instruct:free": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openrouter", "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "supports_tool_choice": true }, "openrouter/mistralai/mistral-large": { - "input_cost_per_token": 8e-6, + "input_cost_per_token": 0.000008, "litellm_provider": "openrouter", "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 2.4e-5, + "output_cost_per_token": 0.000024, "supports_tool_choice": true }, "openrouter/mistralai/mistral-small-3.1-24b-instruct": { @@ -18472,49 +21183,49 @@ "supports_tool_choice": true }, "openrouter/openai/gpt-3.5-turbo": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "openrouter", "max_tokens": 4095, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_tool_choice": true }, "openrouter/openai/gpt-3.5-turbo-16k": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_tokens": 16383, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_tool_choice": true }, "openrouter/openai/gpt-4": { - "input_cost_per_token": 3e-5, + "input_cost_per_token": 0.00003, "litellm_provider": "openrouter", "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_tool_choice": true }, "openrouter/openai/gpt-4-vision-preview": { "input_cost_per_image": 0.01445, - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "openrouter", "max_tokens": 130000, "mode": "chat", - "output_cost_per_token": 3e-5, + "output_cost_per_token": 0.00003, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "openrouter/openai/gpt-4.1": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "openrouter", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -18525,13 +21236,13 @@ }, "openrouter/openai/gpt-4.1-2025-04-14": { "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "openrouter", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -18548,7 +21259,7 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -18565,7 +21276,7 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.6e-6, + "output_cost_per_token": 0.0000016, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -18609,26 +21320,26 @@ "supports_vision": true }, "openrouter/openai/gpt-4o": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "openrouter/openai/gpt-4o-2024-05-13": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true, @@ -18636,43 +21347,58 @@ }, "openrouter/openai/gpt-5-chat": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "openrouter", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, "openrouter/openai/gpt-5-codex": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "openrouter", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, "openrouter/openai/gpt-5": { "cache_read_input_token_cost": 1.25e-7, - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "openrouter", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.00001, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, @@ -18684,9 +21410,14 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "output_cost_per_token": 0.000002, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, @@ -18699,8 +21430,13 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, @@ -18735,14 +21471,14 @@ "supports_tool_choice": true }, "openrouter/openai/o1": { - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "openrouter", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": true, @@ -18752,65 +21488,65 @@ "supports_vision": true }, "openrouter/openai/o1-mini": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1.2e-5, + "output_cost_per_token": 0.000012, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true, "supports_vision": false }, "openrouter/openai/o1-mini-2024-09-12": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1.2e-5, + "output_cost_per_token": 0.000012, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true, "supports_vision": false }, "openrouter/openai/o1-preview": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true, "supports_vision": false }, "openrouter/openai/o1-preview-2024-09-12": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6e-5, + "output_cost_per_token": 0.00006, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true, "supports_vision": false }, "openrouter/openai/o3-mini": { - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_reasoning": true, @@ -18818,13 +21554,13 @@ "supports_vision": false }, "openrouter/openai/o3-mini-high": { - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 4.4e-6, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_reasoning": true, @@ -18832,11 +21568,11 @@ "supports_vision": false }, "openrouter/pygmalionai/mythalion-13b": { - "input_cost_per_token": 1.875e-6, + "input_cost_per_token": 0.000001875, "litellm_provider": "openrouter", "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.875e-6, + "output_cost_per_token": 0.000001875, "supports_tool_choice": true }, "openrouter/qwen/qwen-2.5-coder-32b-instruct": { @@ -18879,26 +21615,26 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 3.4e-6, + "output_cost_per_token": 0.0000034, "source": "https://openrouter.ai/switchpoint/router", "supports_tool_choice": true }, "openrouter/undi95/remm-slerp-l2-13b": { - "input_cost_per_token": 1.875e-6, + "input_cost_per_token": 0.000001875, "litellm_provider": "openrouter", "max_tokens": 6144, "mode": "chat", - "output_cost_per_token": 1.875e-6, + "output_cost_per_token": 0.000001875, "supports_tool_choice": true }, "openrouter/x-ai/grok-4": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "source": "https://openrouter.ai/x-ai/grok-4", "supports_function_calling": true, "supports_reasoning": true, @@ -18920,13 +21656,13 @@ "supports_web_search": false }, "openrouter/z-ai/glm-4.6": { - "input_cost_per_token": 4.0e-7, + "input_cost_per_token": 4e-7, "litellm_provider": "openrouter", "max_input_tokens": 202800, "max_output_tokens": 131000, "max_tokens": 202800, "mode": "chat", - "output_cost_per_token": 1.75e-6, + "output_cost_per_token": 0.00000175, "source": "https://openrouter.ai/z-ai/glm-4.6", "supports_function_calling": true, "supports_reasoning": true, @@ -18939,7 +21675,7 @@ "max_output_tokens": 131000, "max_tokens": 202800, "mode": "chat", - "output_cost_per_token": 1.9e-6, + "output_cost_per_token": 0.0000019, "source": "https://openrouter.ai/z-ai/glm-4.6:exacto", "supports_function_calling": true, "supports_reasoning": true, @@ -19224,7 +21960,7 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.4e-6 + "output_cost_per_token": 0.0000014 }, "perplexity/codellama-70b-instruct": { "input_cost_per_token": 7e-7, @@ -19233,7 +21969,7 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 2.8e-6 + "output_cost_per_token": 0.0000028 }, "perplexity/llama-2-70b-chat": { "input_cost_per_token": 7e-7, @@ -19242,16 +21978,16 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2.8e-6 + "output_cost_per_token": 0.0000028 }, "perplexity/llama-3.1-70b-instruct": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "perplexity", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "perplexity/llama-3.1-8b-instruct": { "input_cost_per_token": 2e-7, @@ -19264,33 +22000,33 @@ }, "perplexity/llama-3.1-sonar-huge-128k-online": { "deprecation_date": "2025-02-22", - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "perplexity", "max_input_tokens": 127072, "max_output_tokens": 127072, "max_tokens": 127072, "mode": "chat", - "output_cost_per_token": 5e-6 + "output_cost_per_token": 0.000005 }, "perplexity/llama-3.1-sonar-large-128k-chat": { "deprecation_date": "2025-02-22", - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "perplexity", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "perplexity/llama-3.1-sonar-large-128k-online": { "deprecation_date": "2025-02-22", - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "perplexity", "max_input_tokens": 127072, "max_output_tokens": 127072, "max_tokens": 127072, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "perplexity/llama-3.1-sonar-small-128k-chat": { "deprecation_date": "2025-02-22", @@ -19337,17 +22073,17 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2.8e-6 + "output_cost_per_token": 0.0000028 }, "perplexity/pplx-70b-online": { "input_cost_per_request": 0.005, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "perplexity", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2.8e-6 + "output_cost_per_token": 0.0000028 }, "perplexity/pplx-7b-chat": { "input_cost_per_token": 7e-8, @@ -19360,7 +22096,7 @@ }, "perplexity/pplx-7b-online": { "input_cost_per_request": 0.005, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "perplexity", "max_input_tokens": 4096, "max_output_tokens": 4096, @@ -19369,12 +22105,12 @@ "output_cost_per_token": 2.8e-7 }, "perplexity/sonar": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "perplexity", "max_input_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "search_context_cost_per_query": { "search_context_size_high": 0.012, "search_context_size_low": 0.005, @@ -19383,14 +22119,14 @@ "supports_web_search": true }, "perplexity/sonar-deep-research": { - "citation_cost_per_token": 2e-6, - "input_cost_per_token": 2e-6, + "citation_cost_per_token": 0.000002, + "input_cost_per_token": 0.000002, "litellm_provider": "perplexity", "max_input_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_reasoning_token": 3e-6, - "output_cost_per_token": 8e-6, + "output_cost_per_reasoning_token": 0.000003, + "output_cost_per_token": 0.000008, "search_context_cost_per_query": { "search_context_size_high": 0.005, "search_context_size_low": 0.005, @@ -19406,7 +22142,7 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1.8e-6 + "output_cost_per_token": 0.0000018 }, "perplexity/sonar-medium-online": { "input_cost_per_request": 0.005, @@ -19416,16 +22152,16 @@ "max_output_tokens": 12000, "max_tokens": 12000, "mode": "chat", - "output_cost_per_token": 1.8e-6 + "output_cost_per_token": 0.0000018 }, "perplexity/sonar-pro": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "perplexity", "max_input_tokens": 200000, "max_output_tokens": 8000, "max_tokens": 8000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.014, "search_context_size_low": 0.006, @@ -19434,12 +22170,12 @@ "supports_web_search": true }, "perplexity/sonar-reasoning": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "perplexity", "max_input_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "search_context_cost_per_query": { "search_context_size_high": 0.014, "search_context_size_low": 0.005, @@ -19449,12 +22185,12 @@ "supports_web_search": true }, "perplexity/sonar-reasoning-pro": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "perplexity", "max_input_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "search_context_cost_per_query": { "search_context_size_high": 0.014, "search_context_size_low": 0.006, @@ -19482,6 +22218,116 @@ "mode": "chat", "output_cost_per_token": 2.8e-7 }, + "publicai/swiss-ai/apertus-8b-instruct": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/swiss-ai/apertus-70b-instruct": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/aisingapore/Gemma-SEA-LION-v4-27B-IT": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/BSC-LT/salamandra-7b-instruct-tools-16k": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/BSC-LT/ALIA-40b-instruct_Q8_0": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/allenai/Olmo-3-7B-Instruct": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/aisingapore/Qwen-SEA-LION-v4-32B-IT": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/allenai/Olmo-3-7B-Think": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "publicai/allenai/Olmo-3-32B-Think": { + "input_cost_per_token": 0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, "qwen.qwen3-coder-480b-a35b-v1:0": { "input_cost_per_token": 2.2e-7, "litellm_provider": "bedrock_converse", @@ -19489,7 +22335,7 @@ "max_output_tokens": 65536, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 1.8e-6, + "output_cost_per_token": 0.0000018, "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true @@ -19513,7 +22359,7 @@ "max_output_tokens": 131072, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 6.0e-7, + "output_cost_per_token": 6e-7, "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true @@ -19525,7 +22371,7 @@ "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 6.0e-7, + "output_cost_per_token": 6e-7, "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true @@ -19535,14 +22381,18 @@ "mode": "image_generation", "output_cost_per_image": 0.022, "source": "https://www.recraft.ai/docs#pricing", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "recraft/recraftv3": { "litellm_provider": "recraft", "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://www.recraft.ai/docs#pricing", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "replicate/meta/llama-2-13b": { "input_cost_per_token": 1e-7, @@ -19571,7 +22421,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2.75e-6, + "output_cost_per_token": 0.00000275, "supports_tool_choice": true }, "replicate/meta/llama-2-70b-chat": { @@ -19581,7 +22431,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 2.75e-6, + "output_cost_per_token": 0.00000275, "supports_tool_choice": true }, "replicate/meta/llama-2-7b": { @@ -19611,7 +22461,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.75e-6, + "output_cost_per_token": 0.00000275, "supports_tool_choice": true }, "replicate/meta/llama-3-70b-instruct": { @@ -19621,7 +22471,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.75e-6, + "output_cost_per_token": 0.00000275, "supports_tool_choice": true }, "replicate/meta/llama-3-8b": { @@ -19671,140 +22521,140 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "supports_tool_choice": true }, "rerank-english-v2.0": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "cohere", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "rerank-english-v3.0": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "cohere", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "rerank-multilingual-v2.0": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "cohere", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "rerank-multilingual-v3.0": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "cohere", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "rerank-v3.5": { "input_cost_per_query": 0.002, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "cohere", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, "max_tokens": 4096, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "nvidia_nim/nvidia/nv-rerankqa-mistral-4b-v3": { - "input_cost_per_query": 0.0, - "input_cost_per_token": 0.0, + "input_cost_per_query": 0, + "input_cost_per_token": 0, "litellm_provider": "nvidia_nim", "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "nvidia_nim/nvidia/llama-3_2-nv-rerankqa-1b-v2": { - "input_cost_per_query": 0.0, - "input_cost_per_token": 0.0, + "input_cost_per_query": 0, + "input_cost_per_token": 0, "litellm_provider": "nvidia_nim", "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "sagemaker/meta-textgeneration-llama-2-13b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "sagemaker", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "sagemaker/meta-textgeneration-llama-2-13b-f": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "sagemaker", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "sagemaker/meta-textgeneration-llama-2-70b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "sagemaker", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "sagemaker/meta-textgeneration-llama-2-70b-b-f": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "sagemaker", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "sagemaker/meta-textgeneration-llama-2-7b": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "sagemaker", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "completion", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "sagemaker/meta-textgeneration-llama-2-7b-f": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "sagemaker", "max_input_tokens": 4096, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "sambanova/DeepSeek-R1": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "sambanova", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 7e-6, + "output_cost_per_token": 0.000007, "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/DeepSeek-R1-Distill-Llama-70B": { @@ -19814,17 +22664,17 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.4e-6, + "output_cost_per_token": 0.0000014, "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/DeepSeek-V3-0324": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "sambanova", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 4.5e-6, + "output_cost_per_token": 0.0000045, "source": "https://cloud.sambanova.ai/plans/pricing", "supports_function_calling": true, "supports_reasoning": true, @@ -19840,7 +22690,7 @@ "notes": "For vision models, images are converted to 6432 input tokens and are billed at that amount" }, "mode": "chat", - "output_cost_per_token": 1.8e-6, + "output_cost_per_token": 0.0000018, "source": "https://cloud.sambanova.ai/plans/pricing", "supports_function_calling": true, "supports_response_schema": true, @@ -19864,13 +22714,13 @@ "supports_tool_choice": true }, "sambanova/Meta-Llama-3.1-405B-Instruct": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "sambanova", "max_input_tokens": 16384, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "source": "https://cloud.sambanova.ai/plans/pricing", "supports_function_calling": true, "supports_response_schema": true, @@ -19916,7 +22766,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://cloud.sambanova.ai/plans/pricing", "supports_function_calling": true, "supports_response_schema": true, @@ -19939,7 +22789,7 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/Qwen2-Audio-7B-Instruct": { @@ -19970,8 +22820,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 3e-6, - "output_cost_per_token": 4.5e-6, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.0000045, "litellm_provider": "sambanova", "mode": "chat", "supports_function_calling": true, @@ -19983,8 +22833,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 3e-6, - "output_cost_per_token": 4.5e-6, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.0000045, "litellm_provider": "sambanova", "mode": "chat", "supports_function_calling": true, @@ -19992,7 +22842,6 @@ "supports_reasoning": true, "source": "https://cloud.sambanova.ai/plans/pricing" }, - "snowflake/claude-3-5-sonnet": { "litellm_provider": "snowflake", "max_input_tokens": 18000, @@ -20209,19 +23058,19 @@ "input_cost_per_pixel": 3.81469e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "standard/1024-x-1792/dall-e-3": { "input_cost_per_pixel": 4.359e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "standard/1792-x-1024/dall-e-3": { "input_cost_per_pixel": 4.359e-8, "litellm_provider": "openai", "mode": "image_generation", - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0 }, "tavily/search": { "input_cost_per_query": 0.008, @@ -20288,23 +23137,23 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "text-completion-codestral/codestral-2405": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "text-completion-codestral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "completion", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://docs.mistral.ai/capabilities/code_generation/" }, "text-completion-codestral/codestral-latest": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "text-completion-codestral", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "completion", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://docs.mistral.ai/capabilities/code_generation/" }, "text-embedding-004": { @@ -20336,8 +23185,8 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0, - "output_cost_per_token_batches": 0.0, + "output_cost_per_token": 0, + "output_cost_per_token_batches": 0, "output_vector_size": 3072 }, "text-embedding-3-small": { @@ -20347,8 +23196,8 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0, - "output_cost_per_token_batches": 0.0, + "output_cost_per_token": 0, + "output_cost_per_token_batches": 0, "output_vector_size": 1536 }, "text-embedding-ada-002": { @@ -20357,7 +23206,7 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 1536 }, "text-embedding-ada-002-v2": { @@ -20367,8 +23216,8 @@ "max_input_tokens": 8191, "max_tokens": 8191, "mode": "embedding", - "output_cost_per_token": 0.0, - "output_cost_per_token_batches": 0.0 + "output_cost_per_token": 0, + "output_cost_per_token_batches": 0 }, "text-embedding-large-exp-03-07": { "input_cost_per_character": 2.5e-8, @@ -20393,31 +23242,31 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" }, "text-moderation-007": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 0, "max_tokens": 32768, "mode": "moderation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "text-moderation-latest": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 0, "max_tokens": 32768, "mode": "moderation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "text-moderation-stable": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "openai", "max_input_tokens": 32768, "max_output_tokens": 0, "max_tokens": 32768, "mode": "moderation", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "text-multilingual-embedding-002": { "input_cost_per_character": 2.5e-8, @@ -20441,23 +23290,23 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "text-unicorn": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "vertex_ai-text-models", "max_input_tokens": 8192, "max_output_tokens": 1024, "max_tokens": 1024, "mode": "completion", - "output_cost_per_token": 2.8e-5, + "output_cost_per_token": 0.000028, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "text-unicorn@001": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "vertex_ai-text-models", "max_input_tokens": 8192, "max_output_tokens": 1024, "max_tokens": 1024, "mode": "completion", - "output_cost_per_token": 2.8e-5, + "output_cost_per_token": 0.000028, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "textembedding-gecko": { @@ -20541,29 +23390,29 @@ "output_cost_per_token": 3e-7 }, "together-ai-81.1b-110b": { - "input_cost_per_token": 1.8e-6, + "input_cost_per_token": 0.0000018, "litellm_provider": "together_ai", "mode": "chat", - "output_cost_per_token": 1.8e-6 + "output_cost_per_token": 0.0000018 }, "together-ai-embedding-151m-to-350m": { "input_cost_per_token": 1.6e-8, "litellm_provider": "together_ai", "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "together-ai-embedding-up-to-150m": { "input_cost_per_token": 8e-9, "litellm_provider": "together_ai", "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "together_ai/baai/bge-base-en-v1.5": { "input_cost_per_token": 8e-9, "litellm_provider": "together_ai", "max_input_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 768 }, "together_ai/BAAI/bge-base-en-v1.5": { @@ -20571,7 +23420,7 @@ "litellm_provider": "together_ai", "max_input_tokens": 512, "mode": "embedding", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "output_vector_size": 768 }, "together-ai-up-to-4b": { @@ -20599,7 +23448,7 @@ "litellm_provider": "together_ai", "max_input_tokens": 262000, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "source": "https://www.together.ai/models/qwen3-235b-a22b-instruct-2507-fp8", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -20610,7 +23459,7 @@ "litellm_provider": "together_ai", "max_input_tokens": 256000, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://www.together.ai/models/qwen3-235b-a22b-thinking-2507", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -20628,24 +23477,24 @@ "supports_tool_choice": false }, "together_ai/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "together_ai", "max_input_tokens": 256000, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "source": "https://www.together.ai/models/qwen3-coder-480b-a35b-instruct", "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "together_ai/deepseek-ai/DeepSeek-R1": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "together_ai", "max_input_tokens": 128000, "max_output_tokens": 20480, "max_tokens": 20480, "mode": "chat", - "output_cost_per_token": 7e-6, + "output_cost_per_token": 0.000007, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true @@ -20655,20 +23504,20 @@ "litellm_provider": "together_ai", "max_input_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2.19e-6, + "output_cost_per_token": 0.00000219, "source": "https://www.together.ai/models/deepseek-r1-0528-throughput", "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "together_ai/deepseek-ai/DeepSeek-V3": { - "input_cost_per_token": 1.25e-6, + "input_cost_per_token": 0.00000125, "litellm_provider": "together_ai", "max_input_tokens": 65536, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true @@ -20678,7 +23527,7 @@ "litellm_provider": "together_ai", "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.7e-6, + "output_cost_per_token": 0.0000017, "source": "https://www.together.ai/models/deepseek-v3-1", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -20731,10 +23580,10 @@ "supports_tool_choice": true }, "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": { - "input_cost_per_token": 3.5e-6, + "input_cost_per_token": 0.0000035, "litellm_provider": "together_ai", "mode": "chat", - "output_cost_per_token": 3.5e-6, + "output_cost_per_token": 0.0000035, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true @@ -20785,10 +23634,10 @@ "supports_tool_choice": true }, "together_ai/moonshotai/Kimi-K2-Instruct": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "together_ai", "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://www.together.ai/models/kimi-k2-instruct", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -20828,20 +23677,20 @@ "litellm_provider": "together_ai", "max_input_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-6, + "output_cost_per_token": 0.0000011, "source": "https://www.together.ai/models/glm-4-5-air", "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "together_ai/zai-org/GLM-4.6": { - "input_cost_per_token": 0.6e-6, + "input_cost_per_token": 6e-7, "litellm_provider": "together_ai", "max_input_tokens": 200000, "max_output_tokens": 200000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 2.2e-6, + "output_cost_per_token": 0.0000022, "source": "https://www.together.ai/models/glm-4-6", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -20849,11 +23698,11 @@ "supports_tool_choice": true }, "together_ai/moonshotai/Kimi-K2-Instruct-0905": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "together_ai", "max_input_tokens": 262144, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "source": "https://www.together.ai/models/kimi-k2-0905", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -20864,7 +23713,7 @@ "litellm_provider": "together_ai", "max_input_tokens": 262144, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://www.together.ai/models/qwen3-next-80b-a3b-instruct", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -20875,23 +23724,27 @@ "litellm_provider": "together_ai", "max_input_tokens": 262144, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://www.together.ai/models/qwen3-next-80b-a3b-thinking", "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, "tts-1": { - "input_cost_per_character": 1.5e-5, + "input_cost_per_character": 0.000015, "litellm_provider": "openai", "mode": "audio_speech", - "supported_endpoints": ["/v1/audio/speech"] + "supported_endpoints": [ + "/v1/audio/speech" + ] }, "tts-1-hd": { - "input_cost_per_character": 3e-5, + "input_cost_per_character": 0.00003, "litellm_provider": "openai", "mode": "audio_speech", - "supported_endpoints": ["/v1/audio/speech"] + "supported_endpoints": [ + "/v1/audio/speech" + ] }, "us.amazon.nova-lite-v1:0": { "input_cost_per_token": 6e-8, @@ -20920,13 +23773,13 @@ "supports_response_schema": true }, "us.amazon.nova-premier-v1:0": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "bedrock_converse", "max_input_tokens": 1000000, "max_output_tokens": 10000, "max_tokens": 10000, "mode": "chat", - "output_cost_per_token": 1.25e-5, + "output_cost_per_token": 0.0000125, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": false, @@ -20940,7 +23793,7 @@ "max_output_tokens": 10000, "max_tokens": 10000, "mode": "chat", - "output_cost_per_token": 3.2e-6, + "output_cost_per_token": 0.0000032, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, @@ -20948,7 +23801,7 @@ "supports_vision": true }, "us.anthropic.claude-3-5-haiku-20241022-v1:0": { - "cache_creation_input_token_cost": 1e-6, + "cache_creation_input_token_cost": 0.000001, "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 8e-7, "litellm_provider": "bedrock", @@ -20956,7 +23809,7 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -20965,15 +23818,15 @@ "supports_tool_choice": true }, "us.anthropic.claude-haiku-4-5-20251001-v1:0": { - "cache_creation_input_token_cost": 1.375e-6, + "cache_creation_input_token_cost": 0.000001375, "cache_read_input_token_cost": 1.1e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5.5e-6, + "output_cost_per_token": 0.0000055, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, "supports_computer_use": true, @@ -20987,13 +23840,13 @@ "tool_use_system_prompt_tokens": 346 }, "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -21001,15 +23854,15 @@ "supports_vision": true }, "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -21020,15 +23873,15 @@ "supports_vision": true }, "us.anthropic.claude-3-7-sonnet-20250219-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -21046,7 +23899,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -21054,26 +23907,26 @@ "supports_vision": true }, "us.anthropic.claude-3-opus-20240229-v1:0": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true }, "us.anthropic.claude-3-sonnet-20240229-v1:0": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_pdf_input": true, "supports_response_schema": true, @@ -21081,15 +23934,15 @@ "supports_vision": true }, "us.anthropic.claude-opus-4-1-20250805-v1:0": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -21107,19 +23960,19 @@ "tool_use_system_prompt_tokens": 159 }, "us.anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 4.125e-6, + "cache_creation_input_token_cost": 0.000004125, "cache_read_input_token_cost": 3.3e-7, - "input_cost_per_token": 3.3e-6, - "input_cost_per_token_above_200k_tokens": 6.6e-6, - "output_cost_per_token_above_200k_tokens": 2.475e-5, - "cache_creation_input_token_cost_above_200k_tokens": 8.25e-6, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.65e-5, + "output_cost_per_token": 0.0000165, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -21137,15 +23990,15 @@ "tool_use_system_prompt_tokens": 346 }, "au.anthropic.claude-haiku-4-5-20251001-v1:0": { - "cache_creation_input_token_cost": 1.375e-6, + "cache_creation_input_token_cost": 0.000001375, "cache_read_input_token_cost": 1.1e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 5.5e-6, + "output_cost_per_token": 0.0000055, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -21158,15 +24011,67 @@ "tool_use_system_prompt_tokens": 346 }, "us.anthropic.claude-opus-4-20250514-v1:0": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "us.anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "global.anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -21184,19 +24089,19 @@ "tool_use_system_prompt_tokens": 159 }, "us.anthropic.claude-sonnet-4-20250514-v1:0": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -21214,25 +24119,25 @@ "tool_use_system_prompt_tokens": 159 }, "us.deepseek.r1-v1:0": { - "input_cost_per_token": 1.35e-6, + "input_cost_per_token": 0.00000135, "litellm_provider": "bedrock_converse", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 5.4e-6, + "output_cost_per_token": 0.0000054, "supports_function_calling": false, "supports_reasoning": true, "supports_tool_choice": false }, "us.meta.llama3-1-405b-instruct-v1:0": { - "input_cost_per_token": 5.32e-6, + "input_cost_per_token": 0.00000532, "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.6e-5, + "output_cost_per_token": 0.000016, "supports_function_calling": true, "supports_tool_choice": false }, @@ -21293,13 +24198,13 @@ "supports_tool_choice": false }, "us.meta.llama3-2-90b-instruct-v1:0": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_tool_choice": false, "supports_vision": true @@ -21325,8 +24230,14 @@ "mode": "chat", "output_cost_per_token": 9.7e-7, "output_cost_per_token_batches": 4.85e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": false }, @@ -21340,30 +24251,36 @@ "mode": "chat", "output_cost_per_token": 6.6e-7, "output_cost_per_token_batches": 3.3e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": false }, "us.mistral.pixtral-large-2502-v1:0": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "bedrock_converse", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_function_calling": true, "supports_tool_choice": false }, "v0/v0-1.0-md": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "v0", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_system_messages": true, @@ -21371,13 +24288,13 @@ "supports_vision": true }, "v0/v0-1.5-lg": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "v0", "max_input_tokens": 512000, "max_output_tokens": 512000, "max_tokens": 512000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_system_messages": true, @@ -21385,13 +24302,13 @@ "supports_vision": true }, "v0/v0-1.5-md": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "v0", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_system_messages": true, @@ -21441,7 +24358,7 @@ "max_output_tokens": 66536, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 1.6e-6 + "output_cost_per_token": 0.0000016 }, "vercel_ai_gateway/amazon/nova-lite": { "input_cost_per_token": 6e-8, @@ -21468,7 +24385,7 @@ "max_output_tokens": 8192, "max_tokens": 300000, "mode": "chat", - "output_cost_per_token": 3.2e-6 + "output_cost_per_token": 0.0000032 }, "vercel_ai_gateway/amazon/titan-embed-text-v2": { "input_cost_per_token": 2e-8, @@ -21477,7 +24394,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/anthropic/claude-3-haiku": { "cache_creation_input_token_cost": 3e-7, @@ -21488,21 +24405,21 @@ "max_output_tokens": 4096, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.25e-6 + "output_cost_per_token": 0.00000125 }, "vercel_ai_gateway/anthropic/claude-3-opus": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 7.5e-5 + "output_cost_per_token": 0.000075 }, "vercel_ai_gateway/anthropic/claude-3.5-haiku": { - "cache_creation_input_token_cost": 1e-6, + "cache_creation_input_token_cost": 0.000001, "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 8e-7, "litellm_provider": "vercel_ai_gateway", @@ -21510,60 +24427,60 @@ "max_output_tokens": 8192, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 4e-6 + "output_cost_per_token": 0.000004 }, "vercel_ai_gateway/anthropic/claude-3.5-sonnet": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/anthropic/claude-3.7-sonnet": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/anthropic/claude-4-opus": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 7.5e-5 + "output_cost_per_token": 0.000075 }, "vercel_ai_gateway/anthropic/claude-4-sonnet": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/cohere/command-a": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 256000, "max_output_tokens": 8000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1e-5 + "output_cost_per_token": 0.00001 }, "vercel_ai_gateway/cohere/command-r": { "input_cost_per_token": 1.5e-7, @@ -21575,13 +24492,13 @@ "output_cost_per_token": 6e-7 }, "vercel_ai_gateway/cohere/command-r-plus": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5 + "output_cost_per_token": 0.00001 }, "vercel_ai_gateway/cohere/embed-v4.0": { "input_cost_per_token": 1.2e-7, @@ -21590,7 +24507,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/deepseek/deepseek-r1": { "input_cost_per_token": 5.5e-7, @@ -21599,7 +24516,7 @@ "max_output_tokens": 8192, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2.19e-6 + "output_cost_per_token": 0.00000219 }, "vercel_ai_gateway/deepseek/deepseek-r1-distill-llama-70b": { "input_cost_per_token": 7.5e-7, @@ -21644,16 +24561,16 @@ "max_output_tokens": 65536, "max_tokens": 1000000, "mode": "chat", - "output_cost_per_token": 2.5e-6 + "output_cost_per_token": 0.0000025 }, "vercel_ai_gateway/google/gemini-2.5-pro": { - "input_cost_per_token": 2.5e-6, + "input_cost_per_token": 0.0000025, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1048576, "max_output_tokens": 65536, "max_tokens": 1048576, "mode": "chat", - "output_cost_per_token": 1e-5 + "output_cost_per_token": 0.00001 }, "vercel_ai_gateway/google/gemini-embedding-001": { "input_cost_per_token": 1.5e-7, @@ -21662,7 +24579,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/google/gemma-2-9b": { "input_cost_per_token": 2e-7, @@ -21680,7 +24597,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/google/text-multilingual-embedding-002": { "input_cost_per_token": 2.5e-8, @@ -21689,7 +24606,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/inception/mercury-coder-small": { "input_cost_per_token": 2.5e-7, @@ -21698,7 +24615,7 @@ "max_output_tokens": 16384, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "vercel_ai_gateway/meta/llama-3-70b": { "input_cost_per_token": 5.9e-7, @@ -21815,7 +24732,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/mistral/devstral-small": { "input_cost_per_token": 7e-8, @@ -21827,13 +24744,13 @@ "output_cost_per_token": 2.8e-7 }, "vercel_ai_gateway/mistral/magistral-medium": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 64000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 5e-6 + "output_cost_per_token": 0.000005 }, "vercel_ai_gateway/mistral/magistral-small": { "input_cost_per_token": 5e-7, @@ -21842,7 +24759,7 @@ "max_output_tokens": 64000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-6 + "output_cost_per_token": 0.0000015 }, "vercel_ai_gateway/mistral/ministral-3b": { "input_cost_per_token": 4e-8, @@ -21869,16 +24786,16 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "chat", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/mistral/mistral-large": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 32000, "max_output_tokens": 4000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 6e-6 + "output_cost_per_token": 0.000006 }, "vercel_ai_gateway/mistral/mistral-saba-24b": { "input_cost_per_token": 7.9e-7, @@ -21899,13 +24816,13 @@ "output_cost_per_token": 3e-7 }, "vercel_ai_gateway/mistral/mixtral-8x22b-instruct": { - "input_cost_per_token": 1.2e-6, + "input_cost_per_token": 0.0000012, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 65536, "max_output_tokens": 2048, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1.2e-6 + "output_cost_per_token": 0.0000012 }, "vercel_ai_gateway/mistral/pixtral-12b": { "input_cost_per_token": 1.5e-7, @@ -21917,13 +24834,13 @@ "output_cost_per_token": 1.5e-7 }, "vercel_ai_gateway/mistral/pixtral-large": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 4000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 6e-6 + "output_cost_per_token": 0.000006 }, "vercel_ai_gateway/moonshotai/kimi-k2": { "input_cost_per_token": 5.5e-7, @@ -21932,7 +24849,7 @@ "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.2e-6 + "output_cost_per_token": 0.0000022 }, "vercel_ai_gateway/morph/morph-v3-fast": { "input_cost_per_token": 8e-7, @@ -21941,7 +24858,7 @@ "max_output_tokens": 16384, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.2e-6 + "output_cost_per_token": 0.0000012 }, "vercel_ai_gateway/morph/morph-v3-large": { "input_cost_per_token": 9e-7, @@ -21950,7 +24867,7 @@ "max_output_tokens": 16384, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1.9e-6 + "output_cost_per_token": 0.0000019 }, "vercel_ai_gateway/openai/gpt-3.5-turbo": { "input_cost_per_token": 5e-7, @@ -21959,39 +24876,39 @@ "max_output_tokens": 4096, "max_tokens": 16385, "mode": "chat", - "output_cost_per_token": 1.5e-6 + "output_cost_per_token": 0.0000015 }, "vercel_ai_gateway/openai/gpt-3.5-turbo-instruct": { - "input_cost_per_token": 1.5e-6, + "input_cost_per_token": 0.0000015, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 8192, "max_output_tokens": 4096, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2e-6 + "output_cost_per_token": 0.000002 }, "vercel_ai_gateway/openai/gpt-4-turbo": { - "input_cost_per_token": 1e-5, + "input_cost_per_token": 0.00001, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 3e-5 + "output_cost_per_token": 0.00003 }, "vercel_ai_gateway/openai/gpt-4.1": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1047576, "max_output_tokens": 32768, "max_tokens": 1047576, "mode": "chat", - "output_cost_per_token": 8e-6 + "output_cost_per_token": 0.000008 }, "vercel_ai_gateway/openai/gpt-4.1-mini": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 1e-7, "input_cost_per_token": 4e-7, "litellm_provider": "vercel_ai_gateway", @@ -21999,10 +24916,10 @@ "max_output_tokens": 32768, "max_tokens": 1047576, "mode": "chat", - "output_cost_per_token": 1.6e-6 + "output_cost_per_token": 0.0000016 }, "vercel_ai_gateway/openai/gpt-4.1-nano": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 2.5e-8, "input_cost_per_token": 1e-7, "litellm_provider": "vercel_ai_gateway", @@ -22013,18 +24930,18 @@ "output_cost_per_token": 4e-7 }, "vercel_ai_gateway/openai/gpt-4o": { - "cache_creation_input_token_cost": 0.0, - "cache_read_input_token_cost": 1.25e-6, - "input_cost_per_token": 2.5e-6, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_token": 0.0000025, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1e-5 + "output_cost_per_token": 0.00001 }, "vercel_ai_gateway/openai/gpt-4o-mini": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 7.5e-8, "input_cost_per_token": 1.5e-7, "litellm_provider": "vercel_ai_gateway", @@ -22035,48 +24952,48 @@ "output_cost_per_token": 6e-7 }, "vercel_ai_gateway/openai/o1": { - "cache_creation_input_token_cost": 0.0, - "cache_read_input_token_cost": 7.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 6e-5 + "output_cost_per_token": 0.00006 }, "vercel_ai_gateway/openai/o3": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 8e-6 + "output_cost_per_token": 0.000008 }, "vercel_ai_gateway/openai/o3-mini": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 4.4e-6 + "output_cost_per_token": 0.0000044 }, "vercel_ai_gateway/openai/o4-mini": { - "cache_creation_input_token_cost": 0.0, + "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 2.75e-7, - "input_cost_per_token": 1.1e-6, + "input_cost_per_token": 0.0000011, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 100000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 4.4e-6 + "output_cost_per_token": 0.0000044 }, "vercel_ai_gateway/openai/text-embedding-3-large": { "input_cost_per_token": 1.3e-7, @@ -22085,7 +25002,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/openai/text-embedding-3-small": { "input_cost_per_token": 2e-8, @@ -22094,7 +25011,7 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/openai/text-embedding-ada-002": { "input_cost_per_token": 1e-7, @@ -22103,97 +25020,97 @@ "max_output_tokens": 0, "max_tokens": 0, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "vercel_ai_gateway/perplexity/sonar": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 127000, "max_output_tokens": 8000, "max_tokens": 127000, "mode": "chat", - "output_cost_per_token": 1e-6 + "output_cost_per_token": 0.000001 }, "vercel_ai_gateway/perplexity/sonar-pro": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 8000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/perplexity/sonar-reasoning": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 127000, "max_output_tokens": 8000, "max_tokens": 127000, "mode": "chat", - "output_cost_per_token": 5e-6 + "output_cost_per_token": 0.000005 }, "vercel_ai_gateway/perplexity/sonar-reasoning-pro": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 127000, "max_output_tokens": 8000, "max_tokens": 127000, "mode": "chat", - "output_cost_per_token": 8e-6 + "output_cost_per_token": 0.000008 }, "vercel_ai_gateway/vercel/v0-1.0-md": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 32000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/vercel/v0-1.5-md": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 32768, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/xai/grok-2": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 131072, "max_output_tokens": 4000, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1e-5 + "output_cost_per_token": 0.00001 }, "vercel_ai_gateway/xai/grok-2-vision": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1e-5 + "output_cost_per_token": 0.00001 }, "vercel_ai_gateway/xai/grok-3": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/xai/grok-3-fast": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.5e-5 + "output_cost_per_token": 0.000025 }, "vercel_ai_gateway/xai/grok-3-mini": { "input_cost_per_token": 3e-7, @@ -22211,16 +25128,16 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 4e-6 + "output_cost_per_token": 0.000004 }, "vercel_ai_gateway/xai/grok-4": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-5 + "output_cost_per_token": 0.000015 }, "vercel_ai_gateway/zai/glm-4.5": { "input_cost_per_token": 6e-7, @@ -22229,7 +25146,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.2e-6 + "output_cost_per_token": 0.0000022 }, "vercel_ai_gateway/zai/glm-4.5-air": { "input_cost_per_token": 2e-7, @@ -22238,7 +25155,7 @@ "max_output_tokens": 96000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.1e-6 + "output_cost_per_token": 0.0000011 }, "vercel_ai_gateway/zai/glm-4.6": { "litellm_provider": "vercel_ai_gateway", @@ -22248,48 +25165,57 @@ "max_output_tokens": 200000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.8e-6, + "output_cost_per_token": 0.0000018, "source": "https://vercel.com/ai-gateway/models/glm-4.6", "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true }, + "vertex_ai/chirp": { + "input_cost_per_character": 0.00003, + "litellm_provider": "vertex_ai", + "mode": "audio_speech", + "source": "https://cloud.google.com/text-to-speech/pricing", + "supported_endpoints": [ + "/v1/audio/speech" + ] + }, "vertex_ai/claude-3-5-haiku": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, "supports_tool_choice": true }, "vertex_ai/claude-3-5-haiku@20241022": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, "supports_tool_choice": true }, "vertex_ai/claude-haiku-4-5@20251001": { - "cache_creation_input_token_cost": 1.25e-6, + "cache_creation_input_token_cost": 0.00000125, "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5e-6, + "output_cost_per_token": 0.000005, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/haiku-4-5", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -22300,13 +25226,13 @@ "supports_tool_choice": true }, "vertex_ai/claude-3-5-sonnet": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -22315,13 +25241,13 @@ "supports_vision": true }, "vertex_ai/claude-3-5-sonnet-v2": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -22330,13 +25256,13 @@ "supports_vision": true }, "vertex_ai/claude-3-5-sonnet-v2@20241022": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -22345,13 +25271,13 @@ "supports_vision": true }, "vertex_ai/claude-3-5-sonnet@20240620": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -22359,16 +25285,16 @@ "supports_vision": true }, "vertex_ai/claude-3-7-sonnet@20250219": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, "deprecation_date": "2025-06-01", - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -22387,7 +25313,7 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, @@ -22400,74 +25326,74 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.25e-6, + "output_cost_per_token": 0.00000125, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/claude-3-opus": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/claude-3-opus@20240229": { - "input_cost_per_token": 1.5e-5, + "input_cost_per_token": 0.000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/claude-3-sonnet": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/claude-3-sonnet@20240229": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/claude-opus-4": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -22485,55 +25411,107 @@ "tool_use_system_prompt_tokens": 159 }, "vertex_ai/claude-opus-4-1": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, - "input_cost_per_token_batches": 7.5e-6, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000075, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, - "output_cost_per_token_batches": 3.75e-5, + "output_cost_per_token": 0.000075, + "output_cost_per_token_batches": 0.0000375, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/claude-opus-4-1@20250805": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, - "input_cost_per_token_batches": 7.5e-6, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000075, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, - "output_cost_per_token_batches": 3.75e-5, + "output_cost_per_token": 0.000075, + "output_cost_per_token_batches": 0.0000375, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, + "vertex_ai/claude-opus-4-5": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/claude-opus-4-5@20251101": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, "vertex_ai/claude-sonnet-4-5": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, - "input_cost_per_token_batches": 1.5e-6, + "input_cost_per_token_batches": 0.0000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_batches": 7.5e-6, + "output_cost_per_token": 0.000015, + "output_cost_per_token_batches": 0.0000075, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -22545,21 +25523,21 @@ "supports_vision": true }, "vertex_ai/claude-sonnet-4-5@20250929": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, - "input_cost_per_token_batches": 1.5e-6, + "input_cost_per_token_batches": 0.0000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_batches": 7.5e-6, + "output_cost_per_token": 0.000015, + "output_cost_per_token_batches": 0.0000075, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -22571,15 +25549,15 @@ "supports_vision": true }, "vertex_ai/claude-opus-4@20250514": { - "cache_creation_input_token_cost": 1.875e-5, - "cache_read_input_token_cost": 1.5e-6, - "input_cost_per_token": 1.5e-5, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 7.5e-5, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -22597,19 +25575,19 @@ "tool_use_system_prompt_tokens": 159 }, "vertex_ai/claude-sonnet-4": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -22627,19 +25605,19 @@ "tool_use_system_prompt_tokens": 159 }, "vertex_ai/claude-sonnet-4@20250514": { - "cache_creation_input_token_cost": 3.75e-6, + "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_200k_tokens": 6e-6, - "output_cost_per_token_above_200k_tokens": 2.25e-5, - "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 1000000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -22734,15 +25712,17 @@ "supports_tool_choice": true }, "vertex_ai/deepseek-ai/deepseek-v3.1-maas": { - "input_cost_per_token": 1.35e-6, + "input_cost_per_token": 0.00000135, "litellm_provider": "vertex_ai-deepseek_models", "max_input_tokens": 163840, "max_output_tokens": 32768, "max_tokens": 163840, "mode": "chat", - "output_cost_per_token": 5.4e-6, + "output_cost_per_token": 0.0000054, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_regions": ["us-west2"], + "supported_regions": [ + "us-west2" + ], "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -22750,13 +25730,13 @@ "supports_tool_choice": true }, "vertex_ai/deepseek-ai/deepseek-r1-0528-maas": { - "input_cost_per_token": 1.35e-6, + "input_cost_per_token": 0.00000135, "litellm_provider": "vertex_ai-deepseek_models", "max_input_tokens": 65336, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5.4e-6, + "output_cost_per_token": 0.0000054, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supports_assistant_prefill": true, "supports_function_calling": true, @@ -22766,7 +25746,7 @@ }, "vertex_ai/gemini-2.5-flash-image": { "cache_read_input_token_cost": 3e-8, - "input_cost_per_audio_token": 1e-6, + "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -22780,13 +25760,25 @@ "max_videos_per_prompt": 10, "mode": "image_generation", "output_cost_per_image": 0.039, - "output_cost_per_reasoning_token": 2.5e-6, - "output_cost_per_token": 2.5e-6, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-generation#edit-an-image", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -22802,16 +25794,17 @@ }, "vertex_ai/gemini-3-pro-image-preview": { "input_cost_per_image": 0.0011, - "input_cost_per_token": 2e-6, - "input_cost_per_token_batches": 1e-6, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "vertex_ai-language-models", "max_input_tokens": 65536, "max_output_tokens": 32768, "max_tokens": 65536, "mode": "image_generation", "output_cost_per_image": 0.134, - "output_cost_per_token": 1.2e-5, - "output_cost_per_token_batches": 6e-6, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 0.000012, + "output_cost_per_token_batches": 0.000006, "source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image" }, "vertex_ai/imagegeneration@006": { @@ -22873,23 +25866,23 @@ "supports_tool_choice": true }, "vertex_ai/jamba-1.5-large": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vertex_ai-ai21_models", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_tool_choice": true }, "vertex_ai/jamba-1.5-large@001": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vertex_ai-ai21_models", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 8e-6, + "output_cost_per_token": 0.000008, "supports_tool_choice": true }, "vertex_ai/jamba-1.5-mini": { @@ -22913,33 +25906,33 @@ "supports_tool_choice": true }, "vertex_ai/meta/llama-3.1-405b-instruct-maas": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 128000, "max_output_tokens": 2048, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.6e-5, + "output_cost_per_token": 0.000016, "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/meta/llama-3.1-70b-instruct-maas": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 128000, "max_output_tokens": 2048, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/meta/llama-3.1-8b-instruct-maas": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 128000, "max_output_tokens": 2048, @@ -22948,14 +25941,14 @@ "notes": "VertexAI states that The Llama 3.1 API service for llama-3.1-70b-instruct-maas and llama-3.1-8b-instruct-maas are in public preview and at no cost." }, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 128000, "max_output_tokens": 2048, @@ -22964,7 +25957,7 @@ "notes": "VertexAI states that The Llama 3.2 API service is at no cost during public preview, and will be priced as per dollar-per-1M-tokens at GA." }, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", "supports_system_messages": true, "supports_tool_choice": true, @@ -22977,10 +25970,16 @@ "max_output_tokens": 1000000, "max_tokens": 1000000, "mode": "chat", - "output_cost_per_token": 1.15e-6, + "output_cost_per_token": 0.00000115, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -22991,10 +25990,16 @@ "max_output_tokens": 1000000, "max_tokens": 1000000, "mode": "chat", - "output_cost_per_token": 1.15e-6, + "output_cost_per_token": 0.00000115, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -23007,8 +26012,14 @@ "mode": "chat", "output_cost_per_token": 7e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -23021,41 +26032,47 @@ "mode": "chat", "output_cost_per_token": 7e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/meta/llama3-405b-instruct-maas": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 32000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supports_tool_choice": true }, "vertex_ai/meta/llama3-70b-instruct-maas": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 32000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supports_tool_choice": true }, "vertex_ai/meta/llama3-8b-instruct-maas": { - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 32000, "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supports_tool_choice": true }, @@ -23066,7 +26083,7 @@ "max_output_tokens": 196608, "max_tokens": 196608, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supports_function_calling": true, "supports_tool_choice": true @@ -23078,7 +26095,7 @@ "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 2.5e-6, + "output_cost_per_token": 0.0000025, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supports_function_calling": true, "supports_tool_choice": true, @@ -23091,7 +26108,7 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_tool_choice": true }, @@ -23102,7 +26119,7 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_tool_choice": true }, @@ -23113,7 +26130,7 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_tool_choice": true }, @@ -23124,62 +26141,62 @@ "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 2e-6, + "output_cost_per_token": 0.000002, "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/mistral-large-2411": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 128000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/mistral-large@2407": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 128000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/mistral-large@2411-001": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 128000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/mistral-large@latest": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 128000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 6e-6, + "output_cost_per_token": 0.000006, "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/mistral-nemo@2407": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_function_calling": true, "supports_tool_choice": true }, @@ -23195,33 +26212,35 @@ "supports_tool_choice": true }, "vertex_ai/mistral-small-2503": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true }, "vertex_ai/mistral-small-2503@001": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 32000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", - "output_cost_per_token": 3e-6, + "output_cost_per_token": 0.000003, "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/mistral-ocr-2505": { "litellm_provider": "vertex_ai", "mode": "ocr", - "ocr_cost_per_page": 5e-4, - "supported_endpoints": ["/v1/ocr"], + "ocr_cost_per_page": 0.0005, + "supported_endpoints": [ + "/v1/ocr" + ], "source": "https://cloud.google.com/generative-ai-app-builder/pricing" }, "vertex_ai/openai/gpt-oss-120b-maas": { @@ -23253,19 +26272,19 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 1e-6, + "output_cost_per_token": 0.000001, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_function_calling": true, "supports_tool_choice": true }, "vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas": { - "input_cost_per_token": 1e-6, + "input_cost_per_token": 0.000001, "litellm_provider": "vertex_ai-qwen_models", "max_input_tokens": 262144, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_function_calling": true, "supports_tool_choice": true @@ -23277,7 +26296,7 @@ "max_output_tokens": 262144, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_function_calling": true, "supports_tool_choice": true @@ -23289,7 +26308,7 @@ "max_output_tokens": 262144, "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 1.2e-6, + "output_cost_per_token": 0.0000012, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_function_calling": true, "supports_tool_choice": true @@ -23301,8 +26320,12 @@ "mode": "video_generation", "output_cost_per_second": 0.35, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "vertex_ai/veo-3.0-fast-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -23311,8 +26334,12 @@ "mode": "video_generation", "output_cost_per_second": 0.15, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "vertex_ai/veo-3.0-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -23321,8 +26348,12 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "vertex_ai/veo-3.0-fast-generate-001": { "litellm_provider": "vertex_ai-video-models", @@ -23331,8 +26362,12 @@ "mode": "video_generation", "output_cost_per_second": 0.15, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "vertex_ai/veo-3.0-generate-001": { "litellm_provider": "vertex_ai-video-models", @@ -23341,8 +26376,12 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "vertex_ai/veo-3.1-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -23351,8 +26390,12 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "vertex_ai/veo-3.1-fast-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -23361,8 +26404,12 @@ "mode": "video_generation", "output_cost_per_second": 0.15, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "voyage/rerank-2": { "input_cost_per_query": 5e-8, @@ -23373,7 +26420,7 @@ "max_query_tokens": 16000, "max_tokens": 16000, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/rerank-2-lite": { "input_cost_per_query": 2e-8, @@ -23384,7 +26431,7 @@ "max_query_tokens": 8000, "max_tokens": 8000, "mode": "rerank", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-2": { "input_cost_per_token": 1e-7, @@ -23392,7 +26439,7 @@ "max_input_tokens": 4000, "max_tokens": 4000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-3": { "input_cost_per_token": 6e-8, @@ -23400,7 +26447,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-3-large": { "input_cost_per_token": 1.8e-7, @@ -23408,7 +26455,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-3-lite": { "input_cost_per_token": 2e-8, @@ -23416,7 +26463,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-3.5": { "input_cost_per_token": 6e-8, @@ -23424,7 +26471,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-3.5-lite": { "input_cost_per_token": 2e-8, @@ -23432,7 +26479,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-code-2": { "input_cost_per_token": 1.2e-7, @@ -23440,7 +26487,7 @@ "max_input_tokens": 16000, "max_tokens": 16000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-code-3": { "input_cost_per_token": 1.8e-7, @@ -23448,7 +26495,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-context-3": { "input_cost_per_token": 1.8e-7, @@ -23456,7 +26503,7 @@ "max_input_tokens": 120000, "max_tokens": 120000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-finance-2": { "input_cost_per_token": 1.2e-7, @@ -23464,7 +26511,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-large-2": { "input_cost_per_token": 1.2e-7, @@ -23472,7 +26519,7 @@ "max_input_tokens": 16000, "max_tokens": 16000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-law-2": { "input_cost_per_token": 1.2e-7, @@ -23480,7 +26527,7 @@ "max_input_tokens": 16000, "max_tokens": 16000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-lite-01": { "input_cost_per_token": 1e-7, @@ -23488,7 +26535,7 @@ "max_input_tokens": 4096, "max_tokens": 4096, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-lite-02-instruct": { "input_cost_per_token": 1e-7, @@ -23496,7 +26543,7 @@ "max_input_tokens": 4000, "max_tokens": 4000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "voyage/voyage-multimodal-3": { "input_cost_per_token": 1.2e-7, @@ -23504,7 +26551,7 @@ "max_input_tokens": 32000, "max_tokens": 32000, "mode": "embedding", - "output_cost_per_token": 0.0 + "output_cost_per_token": 0 }, "wandb/openai/gpt-oss-120b": { "max_tokens": 131072, @@ -23564,8 +26611,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.135, - "output_cost_per_token": 0.4, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000025, "litellm_provider": "wandb", "mode": "chat" }, @@ -23633,13 +26680,13 @@ "mode": "chat" }, "watsonx/ibm/granite-3-8b-instruct": { - "input_cost_per_token": 0.2e-6, + "input_cost_per_token": 2e-7, "litellm_provider": "watsonx", "max_input_tokens": 8192, "max_output_tokens": 1024, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.2e-6, + "output_cost_per_token": 2e-7, "supports_audio_input": false, "supports_audio_output": false, "supports_function_calling": true, @@ -23651,13 +26698,13 @@ "supports_vision": false }, "watsonx/mistralai/mistral-large": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "watsonx", "max_input_tokens": 131072, "max_output_tokens": 16384, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 10e-6, + "output_cost_per_token": 0.00001, "supports_audio_input": false, "supports_audio_output": false, "supports_function_calling": true, @@ -23696,8 +26743,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.6e-6, - "output_cost_per_token": 0.6e-6, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23708,8 +26755,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.6e-6, - "output_cost_per_token": 0.6e-6, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23720,8 +26767,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.6e-6, - "output_cost_per_token": 0.6e-6, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23732,8 +26779,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.2e-6, - "output_cost_per_token": 0.2e-6, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23744,8 +26791,8 @@ "max_tokens": 20480, "max_input_tokens": 20480, "max_output_tokens": 20480, - "input_cost_per_token": 0.06e-6, - "output_cost_per_token": 0.25e-6, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 2.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23756,8 +26803,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.1e-6, - "output_cost_per_token": 0.1e-6, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23768,8 +26815,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.2e-6, - "output_cost_per_token": 0.2e-6, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23780,8 +26827,8 @@ "max_tokens": 512, "max_input_tokens": 512, "max_output_tokens": 512, - "input_cost_per_token": 0.38e-6, - "output_cost_per_token": 0.38e-6, + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 3.8e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23792,8 +26839,8 @@ "max_tokens": 512, "max_input_tokens": 512, "max_output_tokens": 512, - "input_cost_per_token": 0.38e-6, - "output_cost_per_token": 0.38e-6, + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 3.8e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23804,8 +26851,8 @@ "max_tokens": 512, "max_input_tokens": 512, "max_output_tokens": 512, - "input_cost_per_token": 0.38e-6, - "output_cost_per_token": 0.38e-6, + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 3.8e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23816,8 +26863,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.1e-6, - "output_cost_per_token": 0.1e-6, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23828,8 +26875,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.35e-6, - "output_cost_per_token": 0.35e-6, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 3.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23840,8 +26887,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.1e-6, - "output_cost_per_token": 0.1e-6, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23852,8 +26899,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.15e-6, - "output_cost_per_token": 0.15e-6, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23864,8 +26911,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 2e-6, - "output_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23876,8 +26923,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.71e-6, - "output_cost_per_token": 0.71e-6, + "input_cost_per_token": 7.1e-7, + "output_cost_per_token": 7.1e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23888,8 +26935,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.35e-6, - "output_cost_per_token": 1.4e-6, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 0.0000014, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23900,8 +26947,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.35e-6, - "output_cost_per_token": 0.35e-6, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 3.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23912,8 +26959,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 3e-6, - "output_cost_per_token": 10e-6, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.00001, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23924,8 +26971,8 @@ "max_tokens": 32000, "max_input_tokens": 32000, "max_output_tokens": 32000, - "input_cost_per_token": 0.1e-6, - "output_cost_per_token": 0.3e-6, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23936,8 +26983,8 @@ "max_tokens": 32000, "max_input_tokens": 32000, "max_output_tokens": 32000, - "input_cost_per_token": 0.1e-6, - "output_cost_per_token": 0.3e-6, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -23948,8 +26995,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.35e-6, - "output_cost_per_token": 0.35e-6, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 3.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23960,8 +27007,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.15e-6, - "output_cost_per_token": 0.6e-6, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -23972,107 +27019,118 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 1.8e-6, - "output_cost_per_token": 1.8e-6, + "input_cost_per_token": 0.0000018, + "output_cost_per_token": 0.0000018, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, "supports_parallel_function_calling": false, "supports_vision": false }, + "watsonx/whisper-large-v3-turbo": { + "input_cost_per_second": 0.0001, + "output_cost_per_second": 0.0001, + "litellm_provider": "watsonx", + "mode": "audio_transcription", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, "whisper-1": { "input_cost_per_second": 0.0001, "litellm_provider": "openai", "mode": "audio_transcription", "output_cost_per_second": 0.0001, - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "xai/grok-2": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "xai/grok-2-1212": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "xai/grok-2-latest": { - "input_cost_per_token": 2e-6, + "input_cost_per_token": 0.000002, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "xai/grok-2-vision": { - "input_cost_per_image": 2e-6, - "input_cost_per_token": 2e-6, + "input_cost_per_image": 0.000002, + "input_cost_per_token": 0.000002, "litellm_provider": "xai", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true, "supports_web_search": true }, "xai/grok-2-vision-1212": { - "input_cost_per_image": 2e-6, - "input_cost_per_token": 2e-6, + "input_cost_per_image": 0.000002, + "input_cost_per_token": 0.000002, "litellm_provider": "xai", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true, "supports_web_search": true }, "xai/grok-2-vision-latest": { - "input_cost_per_image": 2e-6, - "input_cost_per_token": 2e-6, + "input_cost_per_image": 0.000002, + "input_cost_per_token": 0.000002, "litellm_provider": "xai", "max_input_tokens": 32768, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 1e-5, + "output_cost_per_token": 0.00001, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true, "supports_web_search": true }, "xai/grok-3": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_response_schema": false, @@ -24080,13 +27138,13 @@ "supports_web_search": true }, "xai/grok-3-beta": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_response_schema": false, @@ -24094,13 +27152,13 @@ "supports_web_search": true }, "xai/grok-3-fast-beta": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.5e-5, + "output_cost_per_token": 0.000025, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_response_schema": false, @@ -24108,13 +27166,13 @@ "supports_web_search": true }, "xai/grok-3-fast-latest": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.5e-5, + "output_cost_per_token": 0.000025, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_response_schema": false, @@ -24122,13 +27180,13 @@ "supports_web_search": true }, "xai/grok-3-latest": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_response_schema": false, @@ -24172,7 +27230,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_reasoning": true, @@ -24187,7 +27245,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_reasoning": true, @@ -24202,7 +27260,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 4e-6, + "output_cost_per_token": 0.000004, "source": "https://x.ai/api#pricing", "supports_function_calling": true, "supports_reasoning": true, @@ -24226,13 +27284,13 @@ "supports_web_search": true }, "xai/grok-4": { - "input_cost_per_token": 3e-6, + "input_cost_per_token": 0.000003, "litellm_provider": "xai", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_tool_choice": true, @@ -24240,15 +27298,15 @@ }, "xai/grok-4-fast-reasoning": { "litellm_provider": "xai", - "max_input_tokens": 2e6, - "max_output_tokens": 2e6, - "max_tokens": 2e6, - "mode": "chat", - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, - "output_cost_per_token": 0.5e-6, - "output_cost_per_token_above_128k_tokens": 1e-6, - "cache_read_input_token_cost": 0.05e-6, + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, + "mode": "chat", + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, + "cache_read_input_token_cost": 5e-8, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_tool_choice": true, @@ -24256,61 +27314,61 @@ }, "xai/grok-4-fast-non-reasoning": { "litellm_provider": "xai", - "max_input_tokens": 2e6, - "max_output_tokens": 2e6, - "cache_read_input_token_cost": 0.05e-6, - "max_tokens": 2e6, - "mode": "chat", - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, - "output_cost_per_token": 0.5e-6, - "output_cost_per_token_above_128k_tokens": 1e-6, + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "cache_read_input_token_cost": 5e-8, + "max_tokens": 2000000, + "mode": "chat", + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "xai/grok-4-0709": { - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_128k_tokens": 6e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_128k_tokens": 0.000006, "litellm_provider": "xai", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_above_128k_tokens": 30e-6, + "output_cost_per_token": 0.000015, + "output_cost_per_token_above_128k_tokens": 0.00003, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "xai/grok-4-latest": { - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_128k_tokens": 6e-6, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_128k_tokens": 0.000006, "litellm_provider": "xai", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_above_128k_tokens": 30e-6, + "output_cost_per_token": 0.000015, + "output_cost_per_token_above_128k_tokens": 0.00003, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "xai/grok-4-1-fast": { - "cache_read_input_token_cost": 0.05e-6, - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, "litellm_provider": "xai", - "max_input_tokens": 2e6, - "max_output_tokens": 2e6, - "max_tokens": 2e6, + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, "mode": "chat", - "output_cost_per_token": 0.5e-6, - "output_cost_per_token_above_128k_tokens": 1e-6, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", "supports_audio_input": true, "supports_function_calling": true, @@ -24321,16 +27379,16 @@ "supports_web_search": true }, "xai/grok-4-1-fast-reasoning": { - "cache_read_input_token_cost": 0.05e-6, - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, "litellm_provider": "xai", - "max_input_tokens": 2e6, - "max_output_tokens": 2e6, - "max_tokens": 2e6, + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, "mode": "chat", - "output_cost_per_token": 0.5e-6, - "output_cost_per_token_above_128k_tokens": 1e-6, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", "supports_audio_input": true, "supports_function_calling": true, @@ -24341,16 +27399,16 @@ "supports_web_search": true }, "xai/grok-4-1-fast-reasoning-latest": { - "cache_read_input_token_cost": 0.05e-6, - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, "litellm_provider": "xai", - "max_input_tokens": 2e6, - "max_output_tokens": 2e6, - "max_tokens": 2e6, + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, "mode": "chat", - "output_cost_per_token": 0.5e-6, - "output_cost_per_token_above_128k_tokens": 1e-6, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", "supports_audio_input": true, "supports_function_calling": true, @@ -24361,16 +27419,16 @@ "supports_web_search": true }, "xai/grok-4-1-fast-non-reasoning": { - "cache_read_input_token_cost": 0.05e-6, - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, "litellm_provider": "xai", - "max_input_tokens": 2e6, - "max_output_tokens": 2e6, - "max_tokens": 2e6, + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, "mode": "chat", - "output_cost_per_token": 0.5e-6, - "output_cost_per_token_above_128k_tokens": 1e-6, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, "source": "https://docs.x.ai/docs/models/grok-4-1-fast-non-reasoning", "supports_audio_input": true, "supports_function_calling": true, @@ -24380,16 +27438,16 @@ "supports_web_search": true }, "xai/grok-4-1-fast-non-reasoning-latest": { - "cache_read_input_token_cost": 0.05e-6, - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, "litellm_provider": "xai", - "max_input_tokens": 2e6, - "max_output_tokens": 2e6, - "max_tokens": 2e6, + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, "mode": "chat", - "output_cost_per_token": 0.5e-6, - "output_cost_per_token_above_128k_tokens": 1e-6, + "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, "source": "https://docs.x.ai/docs/models/grok-4-1-fast-non-reasoning", "supports_audio_input": true, "supports_function_calling": true, @@ -24399,13 +27457,13 @@ "supports_web_search": true }, "xai/grok-beta": { - "input_cost_per_token": 5e-6, + "input_cost_per_token": 0.000005, "litellm_provider": "xai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true, @@ -24419,7 +27477,7 @@ "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_reasoning": true, @@ -24433,7 +27491,7 @@ "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_reasoning": true, @@ -24447,28 +27505,117 @@ "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 1.5e-6, + "output_cost_per_token": 0.0000015, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true }, "xai/grok-vision-beta": { - "input_cost_per_image": 5e-6, - "input_cost_per_token": 5e-6, + "input_cost_per_image": 0.000005, + "input_cost_per_token": 0.000005, "litellm_provider": "xai", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-5, + "output_cost_per_token": 0.000015, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true, "supports_web_search": true }, + "zai/glm-4.6": { + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000022, + "litellm_provider": "zai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5": { + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000022, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5v": { + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000018, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-x": { + "input_cost_per_token": 0.0000022, + "output_cost_per_token": 0.0000089, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-air": { + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.0000011, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-airx": { + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000045, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4-32b-0414-128k": { + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-flash": { + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, "vertex_ai/search_api": { - "input_cost_per_query": 1.5e-3, + "input_cost_per_query": 0.0015, "litellm_provider": "vertex_ai", "mode": "vector_store" }, @@ -24482,54 +27629,99 @@ "mode": "video_generation", "output_cost_per_video_per_second": 0.1, "source": "https://platform.openai.com/docs/api-reference/videos", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["video"], - "supported_resolutions": ["720x1280", "1280x720"] + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "720x1280", + "1280x720" + ] }, "openai/sora-2-pro": { "litellm_provider": "openai", "mode": "video_generation", "output_cost_per_video_per_second": 0.3, "source": "https://platform.openai.com/docs/api-reference/videos", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["video"], - "supported_resolutions": ["720x1280", "1280x720"] + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "720x1280", + "1280x720" + ] }, "azure/sora-2": { "litellm_provider": "azure", "mode": "video_generation", "output_cost_per_video_per_second": 0.1, "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"], - "supported_resolutions": ["720x1280", "1280x720"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "720x1280", + "1280x720" + ] }, "azure/sora-2-pro": { "litellm_provider": "azure", "mode": "video_generation", "output_cost_per_video_per_second": 0.3, "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"], - "supported_resolutions": ["720x1280", "1280x720"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "720x1280", + "1280x720" + ] }, "azure/sora-2-pro-high-res": { "litellm_provider": "azure", "mode": "video_generation", "output_cost_per_video_per_second": 0.5, "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"], - "supported_resolutions": ["1024x1792", "1792x1024"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "1024x1792", + "1792x1024" + ] }, "runwayml/gen4_turbo": { "litellm_provider": "runwayml", "mode": "video_generation", "output_cost_per_video_per_second": 0.05, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["video"], - "supported_resolutions": ["1280x720", "720x1280"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "1280x720", + "720x1280" + ], "metadata": { "comment": "5 credits per second @ $0.01 per credit = $0.05 per second" } @@ -24539,9 +27731,17 @@ "mode": "video_generation", "output_cost_per_video_per_second": 0.15, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["video"], - "supported_resolutions": ["1280x720", "720x1280"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "1280x720", + "720x1280" + ], "metadata": { "comment": "15 credits per second @ $0.01 per credit = $0.15 per second" } @@ -24551,9 +27751,17 @@ "mode": "video_generation", "output_cost_per_video_per_second": 0.05, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["video"], - "supported_resolutions": ["1280x720", "720x1280"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "1280x720", + "720x1280" + ], "metadata": { "comment": "5 credits per second @ $0.01 per credit = $0.05 per second" } @@ -24564,9 +27772,17 @@ "input_cost_per_image": 0.05, "output_cost_per_image": 0.05, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["image"], - "supported_resolutions": ["1280x720", "1920x1080"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "image" + ], + "supported_resolutions": [ + "1280x720", + "1920x1080" + ], "metadata": { "comment": "5 credits per 720p image or 8 credits per 1080p image @ $0.01 per credit. Using 5 credits ($0.05) as base cost" } @@ -24577,9 +27793,17 @@ "input_cost_per_image": 0.02, "output_cost_per_image": 0.02, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["image"], - "supported_resolutions": ["1280x720", "1920x1080"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "image" + ], + "supported_resolutions": [ + "1280x720", + "1920x1080" + ], "metadata": { "comment": "2 credits per image (any resolution) @ $0.01 per credit = $0.02 per image" } @@ -24593,4 +27817,4 @@ "comment": "Estimated cost based on standard TTS pricing. RunwayML uses ElevenLabs models." } } -} +} \ No newline at end of file From 81a449ebceced96945b2037397d4bc0f2dc795dc Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 6 Dec 2025 14:33:19 -0600 Subject: [PATCH 6/7] docs: note gpt-5.1-codex-max and xhigh support --- docs/models.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/models.md b/docs/models.md index b383c574ec..88a3011dc4 100644 --- a/docs/models.md +++ b/docs/models.md @@ -47,6 +47,7 @@ GPT-5 family of models: - `openai:gpt-5.1` - `openai:gpt-5-pro` - `openai:gpt-5.1-codex` +- `openai:gpt-5.1-codex-max` — supports the XHIGH (extra high) thinking level; aliases: `gpt-5.1-codex-max`, `codex-max` - `openai:gpt-5.1-codex-mini` #### Google (Cloud) From e1b7cbcdf8c24b104a49d69dbeaa781d386b4621 Mon Sep 17 00:00:00 2001 From: Ammar Date: Sat, 6 Dec 2025 14:38:17 -0600 Subject: [PATCH 7/7] chore: fmt thinking policy and models data --- src/browser/utils/thinking/policy.ts | 5 +- src/common/utils/tokens/models.json | 3631 ++++++-------------------- 2 files changed, 757 insertions(+), 2879 deletions(-) diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts index c81b420237..a39e34d056 100644 --- a/src/browser/utils/thinking/policy.ts +++ b/src/browser/utils/thinking/policy.ts @@ -38,10 +38,7 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy { const withoutPrefix = normalized.replace(/^[a-z0-9_-]+:\s*/, ""); // GPT-5.1-Codex-Max supports 5 reasoning levels including xhigh (Extra High) - if ( - withoutPrefix.startsWith("gpt-5.1-codex-max") || - withoutPrefix.startsWith("codex-max") - ) { + if (withoutPrefix.startsWith("gpt-5.1-codex-max") || withoutPrefix.startsWith("codex-max")) { return ["off", "low", "medium", "high", "xhigh"]; } diff --git a/src/common/utils/tokens/models.json b/src/common/utils/tokens/models.json index b2d757ddf0..cb51542836 100644 --- a/src/common/utils/tokens/models.json +++ b/src/common/utils/tokens/models.json @@ -20,13 +20,7 @@ "search_context_size_low": 0, "search_context_size_medium": 0 }, - "supported_regions": [ - "global", - "us-west-2", - "eu-west-1", - "ap-southeast-1", - "ap-northeast-1" - ], + "supported_regions": ["global", "us-west-2", "eu-west-1", "ap-southeast-1", "ap-northeast-1"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -145,9 +139,7 @@ "mode": "image_generation", "output_cost_per_image": 0.021, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/dall-e-3": { "litellm_provider": "aiml", @@ -157,9 +149,7 @@ "mode": "image_generation", "output_cost_per_image": 0.042, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux-pro": { "litellm_provider": "aiml", @@ -169,25 +159,19 @@ "mode": "image_generation", "output_cost_per_image": 0.053, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux-pro/v1.1": { "litellm_provider": "aiml", "mode": "image_generation", "output_cost_per_image": 0.042, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux-pro/v1.1-ultra": { "litellm_provider": "aiml", "mode": "image_generation", "output_cost_per_image": 0.063, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux-realism": { "litellm_provider": "aiml", @@ -197,9 +181,7 @@ "mode": "image_generation", "output_cost_per_image": 0.037, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux/dev": { "litellm_provider": "aiml", @@ -209,9 +191,7 @@ "mode": "image_generation", "output_cost_per_image": 0.026, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux/kontext-max/text-to-image": { "litellm_provider": "aiml", @@ -221,9 +201,7 @@ "mode": "image_generation", "output_cost_per_image": 0.084, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux/kontext-pro/text-to-image": { "litellm_provider": "aiml", @@ -233,9 +211,7 @@ "mode": "image_generation", "output_cost_per_image": 0.042, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux/schnell": { "litellm_provider": "aiml", @@ -245,9 +221,7 @@ "mode": "image_generation", "output_cost_per_image": 0.003, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "amazon.nova-canvas-v1:0": { "litellm_provider": "bedrock", @@ -1192,16 +1166,9 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.000006, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -1284,16 +1251,9 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000012, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": false, @@ -1407,14 +1367,8 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.000022, - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -1431,18 +1385,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000011, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1463,18 +1408,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.0000022, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1495,19 +1431,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000011, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1528,19 +1454,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000011, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1561,16 +1477,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.000011, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1591,16 +1500,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.0000022, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1621,18 +1523,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4.4e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1795,19 +1688,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1828,19 +1711,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1861,16 +1734,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1891,16 +1757,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.000002, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2157,18 +2016,9 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2191,18 +2041,9 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2224,18 +2065,9 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2258,18 +2090,9 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2291,18 +2114,9 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2324,18 +2138,9 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2438,17 +2243,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2469,17 +2266,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.00002, "output_cost_per_token": 0.0000024, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2500,17 +2289,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2563,17 +2344,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2616,18 +2389,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.000064, "output_cost_per_token": 0.000016, - "supported_endpoints": [ - "/v1/realtime" - ], - "supported_modalities": [ - "text", - "image", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -2648,18 +2412,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.00002, "output_cost_per_token": 0.0000024, - "supported_endpoints": [ - "/v1/realtime" - ], - "supported_modalities": [ - "text", - "image", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -2675,9 +2430,7 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.000005, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "azure/gpt-4o-mini-tts": { "input_cost_per_token": 0.0000025, @@ -2686,16 +2439,9 @@ "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/audio/speech" - ], - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "audio" - ] + "supported_endpoints": ["/v1/audio/speech"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["audio"] }, "azure/gpt-4o-realtime-preview-2024-10-01": { "cache_creation_input_audio_token_cost": 0.00002, @@ -2727,14 +2473,8 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00002, - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -2750,9 +2490,7 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "azure/gpt-4o-transcribe-diarize": { "input_cost_per_audio_token": 0.000006, @@ -2762,9 +2500,7 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "azure/gpt-5.1-2025-11-13": { "cache_read_input_token_cost": 1.25e-7, @@ -2778,18 +2514,9 @@ "mode": "chat", "output_cost_per_token": 0.00001, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2814,18 +2541,9 @@ "mode": "chat", "output_cost_per_token": 0.00001, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -2849,16 +2567,9 @@ "mode": "responses", "output_cost_per_token": 0.00001, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2882,16 +2593,9 @@ "mode": "responses", "output_cost_per_token": 0.000002, "output_cost_per_token_priority": 0.0000036, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2912,18 +2616,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2944,18 +2639,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2977,18 +2663,9 @@ "mode": "chat", "output_cost_per_token": 0.00001, "source": "https://azure.microsoft.com/en-us/blog/gpt-5-in-azure-ai-foundry-the-future-of-ai-apps-and-agents-starts-here/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3009,18 +2686,9 @@ "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3041,16 +2709,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3071,18 +2732,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3103,18 +2755,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3135,18 +2778,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3167,18 +2801,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3199,16 +2824,9 @@ "mode": "responses", "output_cost_per_token": 0.00012, "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure?pivots=azure-openai&tabs=global-standard-aoai%2Cstandard-chat-completions%2Cglobal-standard#gpt-5", - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -3228,19 +2846,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3261,19 +2869,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3294,16 +2892,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3324,16 +2915,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3354,16 +2938,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.000002, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -3380,9 +2957,7 @@ "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/hd/1024-x-1024/dall-e-3": { "input_cost_per_pixel": 7.629e-8, @@ -3407,171 +2982,133 @@ "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0490417e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/gpt-image-1-mini": { "input_cost_per_pixel": 8.0566406e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 2.0751953125e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 2.0751953125e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 2.0345052083e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 8.056640625e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 8.056640625e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 7.9752604167e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 3.173828125e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 3.173828125e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 3.1575520833e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/mistral-large-2402": { "input_cost_per_token": 0.000008, @@ -3693,18 +3230,9 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.000008, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -3723,18 +3251,9 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.000008, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -3752,18 +3271,9 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.00004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -3814,18 +3324,9 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": false, @@ -3844,18 +3345,9 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": false, @@ -3873,18 +3365,9 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.0000044, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -3993,18 +3476,9 @@ "mode": "chat", "output_cost_per_token": 0.0000088, "output_cost_per_token_batches": 0.0000044, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -4027,18 +3501,9 @@ "mode": "chat", "output_cost_per_token": 0.00000176, "output_cost_per_token_batches": 8.8e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -4061,18 +3526,9 @@ "mode": "chat", "output_cost_per_token": 4.4e-7, "output_cost_per_token_batches": 2.2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -4181,14 +3637,8 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.000022, - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -4205,18 +3655,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000011, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -4237,18 +3678,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.0000022, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -4269,18 +3701,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4.4e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -4301,19 +3724,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000011, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -4334,19 +3747,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000011, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -4367,16 +3770,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.000011, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -4397,16 +3793,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.0000022, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -4473,18 +3862,9 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.0000088, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -4559,18 +3939,14 @@ "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/black-forest-labs-flux-1-kontext-pro-and-flux1-1-pro-now-available-in-azure-ai-f/4434659", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure_ai/FLUX.1-Kontext-pro": { "litellm_provider": "azure_ai", "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure_ai/Llama-3.2-11B-Vision-Instruct": { "input_cost_per_token": 3.7e-7, @@ -4853,36 +4229,28 @@ "litellm_provider": "azure_ai", "ocr_cost_per_page": 0.003, "mode": "ocr", - "supported_endpoints": [ - "/v1/ocr" - ], + "supported_endpoints": ["/v1/ocr"], "source": "https://devblogs.microsoft.com/foundry/whats-new-in-azure-ai-foundry-august-2025/#mistral-document-ai-(ocr)-%E2%80%94-serverless-in-foundry" }, "azure_ai/doc-intelligence/prebuilt-read": { "litellm_provider": "azure_ai", "ocr_cost_per_page": 0.0015, "mode": "ocr", - "supported_endpoints": [ - "/v1/ocr" - ], + "supported_endpoints": ["/v1/ocr"], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/doc-intelligence/prebuilt-layout": { "litellm_provider": "azure_ai", "ocr_cost_per_page": 0.01, "mode": "ocr", - "supported_endpoints": [ - "/v1/ocr" - ], + "supported_endpoints": ["/v1/ocr"], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/doc-intelligence/prebuilt-document": { "litellm_provider": "azure_ai", "ocr_cost_per_page": 0.01, "mode": "ocr", - "supported_endpoints": [ - "/v1/ocr" - ], + "supported_endpoints": ["/v1/ocr"], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/MAI-DS-R1": { @@ -4974,13 +4342,8 @@ "output_cost_per_token": 0, "output_vector_size": 3072, "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", - "supported_endpoints": [ - "/v1/embeddings" - ], - "supported_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/embeddings"], + "supported_modalities": ["text", "image"], "supports_embedding_image_input": true }, "azure_ai/global/grok-3": { @@ -7140,16 +6503,9 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.000006, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -7354,16 +6710,9 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000012, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": false, @@ -7383,9 +6732,7 @@ "mode": "chat", "output_cost_per_token": 0.0000017, "source": "https://api-docs.deepseek.com/quick_start/pricing", - "supported_endpoints": [ - "/v1/chat/completions" - ], + "supported_endpoints": ["/v1/chat/completions"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -7404,9 +6751,7 @@ "mode": "chat", "output_cost_per_token": 0.0000017, "source": "https://api-docs.deepseek.com/quick_start/pricing", - "supported_endpoints": [ - "/v1/chat/completions" - ], + "supported_endpoints": ["/v1/chat/completions"], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -7443,18 +6788,12 @@ { "input_cost_per_token": 5e-8, "output_cost_per_token": 4e-7, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 2.5e-7, "output_cost_per_token": 0.000002, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -7472,18 +6811,12 @@ { "input_cost_per_token": 5e-8, "output_cost_per_token": 4e-7, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 2.5e-7, "output_cost_per_token": 0.000002, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -7569,19 +6902,13 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -7600,19 +6927,13 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -7631,19 +6952,13 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -7728,37 +7043,25 @@ "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 3e-7, "output_cost_per_token": 0.0000015, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "cache_read_input_token_cost": 1.2e-7, "input_cost_per_token": 5e-7, "output_cost_per_token": 0.0000025, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "cache_read_input_token_cost": 2e-7, "input_cost_per_token": 8e-7, "output_cost_per_token": 0.000004, - "range": [ - 128000, - 256000 - ] + "range": [128000, 256000] }, { "cache_read_input_token_cost": 4e-7, "input_cost_per_token": 0.0000016, "output_cost_per_token": 0.0000096, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -7776,34 +7079,22 @@ { "input_cost_per_token": 3e-7, "output_cost_per_token": 0.0000015, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "input_cost_per_token": 5e-7, "output_cost_per_token": 0.0000025, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "input_cost_per_token": 8e-7, "output_cost_per_token": 0.000004, - "range": [ - 128000, - 256000 - ] + "range": [128000, 256000] }, { "input_cost_per_token": 0.0000016, "output_cost_per_token": 0.0000096, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -7822,37 +7113,25 @@ "cache_read_input_token_cost": 1e-7, "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000005, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "cache_read_input_token_cost": 1.8e-7, "input_cost_per_token": 0.0000018, "output_cost_per_token": 0.000009, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [ - 128000, - 256000 - ] + "range": [128000, 256000] }, { "cache_read_input_token_cost": 6e-7, "input_cost_per_token": 0.000006, "output_cost_per_token": 0.00006, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -7870,34 +7149,22 @@ { "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000005, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "input_cost_per_token": 0.0000018, "output_cost_per_token": 0.000009, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [ - 128000, - 256000 - ] + "range": [128000, 256000] }, { "input_cost_per_token": 0.000006, "output_cost_per_token": 0.00006, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -7915,26 +7182,17 @@ { "input_cost_per_token": 0.0000012, "output_cost_per_token": 0.000006, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "input_cost_per_token": 0.0000024, "output_cost_per_token": 0.000012, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [ - 128000, - 252000 - ] + "range": [128000, 252000] } ] }, @@ -8439,9 +7697,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-conversationalai": { "input_cost_per_second": 0.00020833, @@ -8453,9 +7709,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-finance": { "input_cost_per_second": 0.00020833, @@ -8467,9 +7721,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-general": { "input_cost_per_second": 0.00020833, @@ -8481,9 +7733,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-meeting": { "input_cost_per_second": 0.00020833, @@ -8495,9 +7745,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-phonecall": { "input_cost_per_second": 0.00020833, @@ -8509,9 +7757,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-video": { "input_cost_per_second": 0.00020833, @@ -8523,9 +7769,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-voicemail": { "input_cost_per_second": 0.00020833, @@ -8537,9 +7781,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced": { "input_cost_per_second": 0.00024167, @@ -8551,9 +7793,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced-finance": { "input_cost_per_second": 0.00024167, @@ -8565,9 +7805,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced-general": { "input_cost_per_second": 0.00024167, @@ -8579,9 +7817,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced-meeting": { "input_cost_per_second": 0.00024167, @@ -8593,9 +7829,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced-phonecall": { "input_cost_per_second": 0.00024167, @@ -8607,9 +7841,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova": { "input_cost_per_second": 0.00007167, @@ -8621,9 +7853,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2": { "input_cost_per_second": 0.00007167, @@ -8635,9 +7865,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-atc": { "input_cost_per_second": 0.00007167, @@ -8649,9 +7877,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-automotive": { "input_cost_per_second": 0.00007167, @@ -8663,9 +7889,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-conversationalai": { "input_cost_per_second": 0.00007167, @@ -8677,9 +7901,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-drivethru": { "input_cost_per_second": 0.00007167, @@ -8691,9 +7913,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-finance": { "input_cost_per_second": 0.00007167, @@ -8705,9 +7925,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-general": { "input_cost_per_second": 0.00007167, @@ -8719,9 +7937,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-meeting": { "input_cost_per_second": 0.00007167, @@ -8733,9 +7949,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-phonecall": { "input_cost_per_second": 0.00007167, @@ -8747,9 +7961,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-video": { "input_cost_per_second": 0.00007167, @@ -8761,9 +7973,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-voicemail": { "input_cost_per_second": 0.00007167, @@ -8775,9 +7985,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-3": { "input_cost_per_second": 0.00007167, @@ -8789,9 +7997,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-3-general": { "input_cost_per_second": 0.00007167, @@ -8803,9 +8009,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-3-medical": { "input_cost_per_second": 0.00008667, @@ -8817,9 +8021,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-general": { "input_cost_per_second": 0.00007167, @@ -8831,9 +8033,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-phonecall": { "input_cost_per_second": 0.00007167, @@ -8845,9 +8045,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper": { "input_cost_per_second": 0.0001, @@ -8858,9 +8056,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-base": { "input_cost_per_second": 0.0001, @@ -8871,9 +8067,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-large": { "input_cost_per_second": 0.0001, @@ -8884,9 +8078,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-medium": { "input_cost_per_second": 0.0001, @@ -8897,9 +8089,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-small": { "input_cost_per_second": 0.0001, @@ -8910,9 +8100,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-tiny": { "input_cost_per_second": 0.0001, @@ -8923,9 +8111,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepinfra/Gryphe/MythoMax-L2-13b": { "max_tokens": 4096, @@ -9782,17 +8968,11 @@ "tiered_pricing": [ { "input_cost_per_query": 0.005, - "max_results_range": [ - 0, - 25 - ] + "max_results_range": [0, 25] }, { "input_cost_per_query": 0.025, - "max_results_range": [ - 26, - 100 - ] + "max_results_range": [26, 100] } ] }, @@ -9802,73 +8982,43 @@ "tiered_pricing": [ { "input_cost_per_query": 0.00166, - "max_results_range": [ - 1, - 10 - ] + "max_results_range": [1, 10] }, { "input_cost_per_query": 0.00332, - "max_results_range": [ - 11, - 20 - ] + "max_results_range": [11, 20] }, { "input_cost_per_query": 0.00498, - "max_results_range": [ - 21, - 30 - ] + "max_results_range": [21, 30] }, { "input_cost_per_query": 0.00664, - "max_results_range": [ - 31, - 40 - ] + "max_results_range": [31, 40] }, { "input_cost_per_query": 0.0083, - "max_results_range": [ - 41, - 50 - ] + "max_results_range": [41, 50] }, { "input_cost_per_query": 0.00996, - "max_results_range": [ - 51, - 60 - ] + "max_results_range": [51, 60] }, { "input_cost_per_query": 0.01162, - "max_results_range": [ - 61, - 70 - ] + "max_results_range": [61, 70] }, { "input_cost_per_query": 0.01328, - "max_results_range": [ - 71, - 80 - ] + "max_results_range": [71, 80] }, { "input_cost_per_query": 0.01494, - "max_results_range": [ - 81, - 90 - ] + "max_results_range": [81, 90] }, { "input_cost_per_query": 0.0166, - "max_results_range": [ - 91, - 100 - ] + "max_results_range": [91, 100] } ], "metadata": { @@ -9899,9 +9049,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://elevenlabs.io/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "elevenlabs/scribe_v1_experimental": { "input_cost_per_second": 0.0000611, @@ -9914,9 +9062,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://elevenlabs.io/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "embed-english-light-v2.0": { "input_cost_per_token": 1e-7, @@ -10300,97 +9446,73 @@ "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/flux-pro/v1.1": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.04, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/flux-pro/v1.1-ultra": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.06, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/flux/schnell": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.003, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/bytedance/seedream/v3/text-to-image": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.03, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/bytedance/dreamina/v3.1/text-to-image": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.03, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/ideogram/v3": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.06, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/imagen4/preview": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/imagen4/preview/fast": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.02, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/imagen4/preview/ultra": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.06, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/recraft/v3/text-to-image": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/stable-diffusion-v35-medium": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "featherless_ai/featherless-ai/Qwerky-72B": { "litellm_provider": "featherless_ai", @@ -11590,16 +10712,8 @@ "mode": "chat", "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -11630,16 +10744,8 @@ "mode": "chat", "output_cost_per_token": 6e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11678,16 +10784,8 @@ "output_cost_per_token": 6e-7, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11714,15 +10812,8 @@ "mode": "chat", "output_cost_per_token": 3e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11750,15 +10841,8 @@ "mode": "chat", "output_cost_per_token": 3e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11790,20 +10874,9 @@ "output_cost_per_token": 0.000002, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini#gemini-2-0-flash-live-preview-04-09", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], "supports_audio_output": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -11834,16 +10907,8 @@ "mode": "chat", "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -11883,16 +10948,8 @@ "output_cost_per_token": 0, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11931,16 +10988,8 @@ "output_cost_per_token": 0, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": false, "supports_parallel_function_calling": true, @@ -11970,19 +11019,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12013,20 +11052,9 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12060,21 +11088,9 @@ "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12108,21 +11124,9 @@ "output_cost_per_token": 0.00003, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12150,19 +11154,9 @@ "output_cost_per_token": 0.000012, "output_cost_per_token_batches": 0.000006, "source": "https://ai.google.dev/gemini-api/docs/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": false, "supports_prompt_caching": true, "supports_response_schema": true, @@ -12188,20 +11182,9 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12233,20 +11216,9 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12278,20 +11250,9 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12323,20 +11284,9 @@ "output_cost_per_audio_token": 0.000012, "output_cost_per_token": 0.000002, "source": "https://ai.google.dev/gemini-api/docs/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -12369,20 +11319,9 @@ "output_cost_per_token": 0.000002, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -12415,20 +11354,9 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12460,20 +11388,9 @@ "output_cost_per_reasoning_token": 0.0000035, "output_cost_per_token": 6e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12504,20 +11421,9 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12550,19 +11456,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -12597,20 +11493,9 @@ "output_cost_per_token_above_200k_tokens": 0.000018, "output_cost_per_token_batches": 0.000006, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -12645,20 +11530,9 @@ "output_cost_per_token_above_200k_tokens": 0.000018, "output_cost_per_token_batches": 0.000006, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -12689,19 +11563,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12733,20 +11597,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12778,23 +11631,10 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], - "supported_regions": [ - "global" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supported_regions": ["global"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12826,20 +11666,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12871,12 +11700,8 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "audio" - ], + "supported_modalities": ["text"], + "supported_output_modalities": ["audio"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -13321,16 +12146,8 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13361,16 +12178,8 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -13410,16 +12219,8 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -13447,15 +12248,8 @@ "output_cost_per_token": 3e-7, "rpm": 4000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -13484,15 +12278,8 @@ "output_cost_per_token": 3e-7, "rpm": 60000, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -13524,20 +12311,9 @@ "output_cost_per_token": 0.0000015, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2-0-flash-live-001", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], "supports_audio_output": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -13569,16 +12345,8 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13619,16 +12387,8 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -13668,16 +12428,8 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -13749,20 +12501,9 @@ "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -13798,21 +12539,9 @@ "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -13846,21 +12575,9 @@ "output_cost_per_token": 0.00003, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -13890,19 +12607,9 @@ "tpm": 4000000, "output_cost_per_token_batches": 0.000006, "source": "https://ai.google.dev/gemini-api/docs/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": false, "supports_prompt_caching": true, "supports_response_schema": true, @@ -13929,20 +12636,9 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -13976,20 +12672,9 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -14023,20 +12708,9 @@ "output_cost_per_token": 0.0000025, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -14070,20 +12744,9 @@ "output_cost_per_token": 0.0000025, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -14117,20 +12780,9 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -14164,20 +12816,9 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -14211,19 +12852,9 @@ "output_cost_per_token": 6e-7, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -14255,19 +12886,9 @@ "output_cost_per_token": 0.0000025, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -14300,16 +12921,9 @@ "output_cost_per_token": 6e-7, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "audio" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text"], + "supported_output_modalities": ["audio"], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -14340,19 +12954,9 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -14388,20 +12992,9 @@ "output_cost_per_token_batches": 0.000006, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -14434,19 +13027,9 @@ "output_cost_per_token_above_200k_tokens": 0, "rpm": 5, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -14479,15 +13062,8 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -14519,15 +13095,8 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -14560,15 +13129,8 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -14601,12 +13163,8 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "audio" - ], + "supported_modalities": ["text"], + "supported_output_modalities": ["audio"], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -14833,12 +13391,8 @@ "mode": "video_generation", "output_cost_per_second": 0.35, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "gemini/veo-3.0-fast-generate-preview": { "litellm_provider": "gemini", @@ -14847,12 +13401,8 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "gemini/veo-3.0-generate-preview": { "litellm_provider": "gemini", @@ -14861,12 +13411,8 @@ "mode": "video_generation", "output_cost_per_second": 0.75, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "gemini/veo-3.1-fast-generate-preview": { "litellm_provider": "gemini", @@ -14875,12 +13421,8 @@ "mode": "video_generation", "output_cost_per_second": 0.15, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "gemini/veo-3.1-generate-preview": { "litellm_provider": "gemini", @@ -14889,12 +13431,8 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "google_pse/search": { "input_cost_per_query": 0.005, @@ -15288,18 +13826,9 @@ "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, "output_cost_per_token_priority": 0.000014, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -15322,18 +13851,9 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -15359,18 +13879,9 @@ "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, "output_cost_per_token_priority": 0.0000028, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -15393,18 +13904,9 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -15430,18 +13932,9 @@ "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, "output_cost_per_token_priority": 8e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -15464,18 +13957,9 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -15858,9 +14342,7 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.000005, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "gpt-4o-mini-tts": { "input_cost_per_token": 0.0000025, @@ -15869,16 +14351,9 @@ "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/audio/speech" - ], - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "audio" - ] + "supported_endpoints": ["/v1/audio/speech"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["audio"] }, "gpt-4o-realtime-preview": { "cache_read_input_token_cost": 0.0000025, @@ -16007,9 +14482,7 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "gpt-5": { "cache_read_input_token_cost": 1.25e-7, @@ -16026,18 +14499,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_flex": 0.000005, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16062,18 +14526,9 @@ "mode": "chat", "output_cost_per_token": 0.00001, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16098,18 +14553,9 @@ "mode": "chat", "output_cost_per_token": 0.00001, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16134,18 +14580,9 @@ "mode": "chat", "output_cost_per_token": 0.00001, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -16167,17 +14604,9 @@ "mode": "responses", "output_cost_per_token": 0.00012, "output_cost_per_token_batches": 0.00006, - "supported_endpoints": [ - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -16200,17 +14629,9 @@ "mode": "responses", "output_cost_per_token": 0.00012, "output_cost_per_token_batches": 0.00006, - "supported_endpoints": [ - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -16238,18 +14659,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_flex": 0.000005, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16271,18 +14683,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -16303,18 +14706,9 @@ "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -16335,16 +14729,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16368,16 +14755,9 @@ "mode": "responses", "output_cost_per_token": 0.00001, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16398,16 +14778,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16431,16 +14804,9 @@ "mode": "responses", "output_cost_per_token": 0.000002, "output_cost_per_token_priority": 0.0000036, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16467,18 +14833,9 @@ "output_cost_per_token": 0.000002, "output_cost_per_token_flex": 0.000001, "output_cost_per_token_priority": 0.0000036, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16506,18 +14863,9 @@ "output_cost_per_token": 0.000002, "output_cost_per_token_flex": 0.000001, "output_cost_per_token_priority": 0.0000036, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16543,18 +14891,9 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_flex": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16578,18 +14917,9 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_flex": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16606,9 +14936,7 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "gpt-image-1-mini": { "cache_read_input_image_token_cost": 2.5e-7, @@ -16618,10 +14946,7 @@ "litellm_provider": "openai", "mode": "chat", "output_cost_per_image_token": 0.000008, - "supported_endpoints": [ - "/v1/images/generations", - "/v1/images/edits" - ] + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"] }, "gpt-realtime": { "cache_creation_input_audio_token_cost": 4e-7, @@ -16636,18 +14961,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.000064, "output_cost_per_token": 0.000016, - "supported_endpoints": [ - "/v1/realtime" - ], - "supported_modalities": [ - "text", - "image", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -16667,18 +14983,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.00002, "output_cost_per_token": 0.0000024, - "supported_endpoints": [ - "/v1/realtime" - ], - "supported_modalities": [ - "text", - "image", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -16699,18 +15006,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.000064, "output_cost_per_token": 0.000016, - "supported_endpoints": [ - "/v1/realtime" - ], - "supported_modalities": [ - "text", - "image", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -16722,12 +15020,8 @@ "litellm_provider": "gradient_ai", "max_tokens": 2048, "mode": "chat", - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3-opus": { @@ -16736,12 +15030,8 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000075, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.5-haiku": { @@ -16750,12 +15040,8 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.5-sonnet": { @@ -16764,12 +15050,8 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000015, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.7-sonnet": { @@ -16778,12 +15060,8 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000015, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/deepseek-r1-distill-llama-70b": { @@ -16792,12 +15070,8 @@ "max_tokens": 8000, "mode": "chat", "output_cost_per_token": 9.9e-7, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/llama3-8b-instruct": { @@ -16806,12 +15080,8 @@ "max_tokens": 512, "mode": "chat", "output_cost_per_token": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/llama3.3-70b-instruct": { @@ -16820,12 +15090,8 @@ "max_tokens": 2048, "mode": "chat", "output_cost_per_token": 6.5e-7, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/mistral-nemo-instruct-2407": { @@ -16834,36 +15100,24 @@ "max_tokens": 512, "mode": "chat", "output_cost_per_token": 3e-7, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/openai-gpt-4o": { "litellm_provider": "gradient_ai", "max_tokens": 16384, "mode": "chat", - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/openai-gpt-4o-mini": { "litellm_provider": "gradient_ai", "max_tokens": 16384, "mode": "chat", - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/openai-o3": { @@ -16872,12 +15126,8 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.000008, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/openai-o3-mini": { @@ -16886,12 +15136,8 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.0000044, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "lemonade/Qwen3-Coder-30B-A3B-Instruct-GGUF": { @@ -17374,27 +15620,21 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "high/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "high/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "hyperbolic/NousResearch/Hermes-3-Llama-3.1-70B": { "input_cost_per_token": 1.2e-7, @@ -18053,27 +16293,21 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "luminous-base": { "input_cost_per_token": 0.00003, @@ -18136,75 +16370,57 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1024-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.005, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1024-x-1536/gpt-image-1-mini": { "input_cost_per_image": 0.006, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1536-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.006, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1024-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.011, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1024-x-1536/gpt-image-1-mini": { "input_cost_per_image": 0.015, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1536-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.015, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medlm-large": { "input_cost_per_character": 0.000005, @@ -18364,14 +16580,8 @@ "mode": "chat", "output_cost_per_token": 9.7e-7, "output_cost_per_token_batches": 4.85e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": false }, @@ -18385,14 +16595,8 @@ "mode": "chat", "output_cost_per_token": 6.6e-7, "output_cost_per_token_batches": 3.3e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": false }, @@ -18403,12 +16607,8 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -18419,12 +16619,8 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -18435,13 +16631,8 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -18452,13 +16643,8 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -18626,9 +16812,7 @@ "ocr_cost_per_page": 0.001, "annotation_cost_per_page": 0.003, "mode": "ocr", - "supported_endpoints": [ - "/v1/ocr" - ], + "supported_endpoints": ["/v1/ocr"], "source": "https://mistral.ai/pricing#api-pricing" }, "mistral/mistral-ocr-2505-completion": { @@ -18636,9 +16820,7 @@ "ocr_cost_per_page": 0.001, "annotation_cost_per_page": 0.003, "mode": "ocr", - "supported_endpoints": [ - "/v1/ocr" - ], + "supported_endpoints": ["/v1/ocr"], "source": "https://mistral.ai/pricing#api-pricing" }, "mistral/magistral-medium-latest": { @@ -19283,14 +17465,8 @@ "output_cost_per_token": 0, "output_vector_size": 768, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", - "supported_endpoints": [ - "/v1/embeddings" - ], - "supported_modalities": [ - "text", - "image", - "video" - ] + "supported_endpoints": ["/v1/embeddings"], + "supported_modalities": ["text", "image", "video"] }, "multimodalembedding@001": { "input_cost_per_character": 2e-7, @@ -19306,14 +17482,8 @@ "output_cost_per_token": 0, "output_vector_size": 768, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", - "supported_endpoints": [ - "/v1/embeddings" - ], - "supported_modalities": [ - "text", - "image", - "video" - ] + "supported_endpoints": ["/v1/embeddings"], + "supported_modalities": ["text", "image", "video"] }, "nscale/Qwen/QwQ-32B": { "input_cost_per_token": 1.8e-7, @@ -19349,9 +17519,7 @@ "mode": "image_generation", "output_cost_per_pixel": 0, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { "input_cost_per_token": 3.75e-7, @@ -19456,9 +17624,7 @@ "mode": "image_generation", "output_cost_per_pixel": 0, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "o1": { "cache_read_input_token_cost": 0.0000075, @@ -19564,17 +17730,9 @@ "mode": "responses", "output_cost_per_token": 0.0006, "output_cost_per_token_batches": 0.0003, - "supported_endpoints": [ - "/v1/responses", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -19596,17 +17754,9 @@ "mode": "responses", "output_cost_per_token": 0.0006, "output_cost_per_token_batches": 0.0003, - "supported_endpoints": [ - "/v1/responses", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -19639,13 +17789,8 @@ "/v1/completions", "/v1/batch" ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -19671,13 +17816,8 @@ "/v1/completions", "/v1/batch" ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -19699,18 +17839,9 @@ "mode": "responses", "output_cost_per_token": 0.00004, "output_cost_per_token_batches": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -19732,18 +17863,9 @@ "mode": "responses", "output_cost_per_token": 0.00004, "output_cost_per_token_batches": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -19798,17 +17920,9 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": [ - "/v1/responses", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -19828,17 +17942,9 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": [ - "/v1/responses", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -19903,18 +18009,9 @@ "mode": "responses", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -19936,18 +18033,9 @@ "mode": "responses", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -20959,20 +19047,9 @@ "output_cost_per_token": 0.000012, "output_cost_per_token_above_200k_tokens": 0.000018, "output_cost_per_token_batches": 0.000006, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -21354,13 +19431,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -21373,13 +19445,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -21392,13 +19459,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -21411,13 +19473,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -21430,13 +19487,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -22381,18 +20433,14 @@ "mode": "image_generation", "output_cost_per_image": 0.022, "source": "https://www.recraft.ai/docs#pricing", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "recraft/recraftv3": { "litellm_provider": "recraft", "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://www.recraft.ai/docs#pricing", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "replicate/meta/llama-2-13b": { "input_cost_per_token": 1e-7, @@ -23734,17 +21782,13 @@ "input_cost_per_character": 0.000015, "litellm_provider": "openai", "mode": "audio_speech", - "supported_endpoints": [ - "/v1/audio/speech" - ] + "supported_endpoints": ["/v1/audio/speech"] }, "tts-1-hd": { "input_cost_per_character": 0.00003, "litellm_provider": "openai", "mode": "audio_speech", - "supported_endpoints": [ - "/v1/audio/speech" - ] + "supported_endpoints": ["/v1/audio/speech"] }, "us.amazon.nova-lite-v1:0": { "input_cost_per_token": 6e-8, @@ -24230,14 +22274,8 @@ "mode": "chat", "output_cost_per_token": 9.7e-7, "output_cost_per_token_batches": 4.85e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": false }, @@ -24251,14 +22289,8 @@ "mode": "chat", "output_cost_per_token": 6.6e-7, "output_cost_per_token_batches": 3.3e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": false }, @@ -25176,9 +23208,7 @@ "litellm_provider": "vertex_ai", "mode": "audio_speech", "source": "https://cloud.google.com/text-to-speech/pricing", - "supported_endpoints": [ - "/v1/audio/speech" - ] + "supported_endpoints": ["/v1/audio/speech"] }, "vertex_ai/claude-3-5-haiku": { "input_cost_per_token": 0.000001, @@ -25720,9 +23750,7 @@ "mode": "chat", "output_cost_per_token": 0.0000054, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_regions": [ - "us-west2" - ], + "supported_regions": ["us-west2"], "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -25764,21 +23792,9 @@ "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-generation#edit-an-image", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -25972,14 +23988,8 @@ "mode": "chat", "output_cost_per_token": 0.00000115, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -25992,14 +24002,8 @@ "mode": "chat", "output_cost_per_token": 0.00000115, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -26012,14 +24016,8 @@ "mode": "chat", "output_cost_per_token": 7e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -26032,14 +24030,8 @@ "mode": "chat", "output_cost_per_token": 7e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -26238,9 +24230,7 @@ "litellm_provider": "vertex_ai", "mode": "ocr", "ocr_cost_per_page": 0.0005, - "supported_endpoints": [ - "/v1/ocr" - ], + "supported_endpoints": ["/v1/ocr"], "source": "https://cloud.google.com/generative-ai-app-builder/pricing" }, "vertex_ai/openai/gpt-oss-120b-maas": { @@ -26320,12 +24310,8 @@ "mode": "video_generation", "output_cost_per_second": 0.35, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "vertex_ai/veo-3.0-fast-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -26334,12 +24320,8 @@ "mode": "video_generation", "output_cost_per_second": 0.15, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "vertex_ai/veo-3.0-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -26348,12 +24330,8 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "vertex_ai/veo-3.0-fast-generate-001": { "litellm_provider": "vertex_ai-video-models", @@ -26362,12 +24340,8 @@ "mode": "video_generation", "output_cost_per_second": 0.15, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "vertex_ai/veo-3.0-generate-001": { "litellm_provider": "vertex_ai-video-models", @@ -26376,12 +24350,8 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "vertex_ai/veo-3.1-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -26390,12 +24360,8 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "vertex_ai/veo-3.1-fast-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -26404,12 +24370,8 @@ "mode": "video_generation", "output_cost_per_second": 0.15, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "voyage/rerank-2": { "input_cost_per_query": 5e-8, @@ -27032,18 +24994,14 @@ "output_cost_per_second": 0.0001, "litellm_provider": "watsonx", "mode": "audio_transcription", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "whisper-1": { "input_cost_per_second": 0.0001, "litellm_provider": "openai", "mode": "audio_transcription", "output_cost_per_second": 0.0001, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "xai/grok-2": { "input_cost_per_token": 0.000002, @@ -27629,99 +25587,54 @@ "mode": "video_generation", "output_cost_per_video_per_second": 0.1, "source": "https://platform.openai.com/docs/api-reference/videos", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "720x1280", - "1280x720" - ] + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] }, "openai/sora-2-pro": { "litellm_provider": "openai", "mode": "video_generation", "output_cost_per_video_per_second": 0.3, "source": "https://platform.openai.com/docs/api-reference/videos", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "720x1280", - "1280x720" - ] + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] }, "azure/sora-2": { "litellm_provider": "azure", "mode": "video_generation", "output_cost_per_video_per_second": 0.1, "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "720x1280", - "1280x720" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] }, "azure/sora-2-pro": { "litellm_provider": "azure", "mode": "video_generation", "output_cost_per_video_per_second": 0.3, "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "720x1280", - "1280x720" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] }, "azure/sora-2-pro-high-res": { "litellm_provider": "azure", "mode": "video_generation", "output_cost_per_video_per_second": 0.5, "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "1024x1792", - "1792x1024" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1024x1792", "1792x1024"] }, "runwayml/gen4_turbo": { "litellm_provider": "runwayml", "mode": "video_generation", "output_cost_per_video_per_second": 0.05, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "1280x720", - "720x1280" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1280x720", "720x1280"], "metadata": { "comment": "5 credits per second @ $0.01 per credit = $0.05 per second" } @@ -27731,17 +25644,9 @@ "mode": "video_generation", "output_cost_per_video_per_second": 0.15, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "1280x720", - "720x1280" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1280x720", "720x1280"], "metadata": { "comment": "15 credits per second @ $0.01 per credit = $0.15 per second" } @@ -27751,17 +25656,9 @@ "mode": "video_generation", "output_cost_per_video_per_second": 0.05, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "1280x720", - "720x1280" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1280x720", "720x1280"], "metadata": { "comment": "5 credits per second @ $0.01 per credit = $0.05 per second" } @@ -27772,17 +25669,9 @@ "input_cost_per_image": 0.05, "output_cost_per_image": 0.05, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "image" - ], - "supported_resolutions": [ - "1280x720", - "1920x1080" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["image"], + "supported_resolutions": ["1280x720", "1920x1080"], "metadata": { "comment": "5 credits per 720p image or 8 credits per 1080p image @ $0.01 per credit. Using 5 credits ($0.05) as base cost" } @@ -27793,17 +25682,9 @@ "input_cost_per_image": 0.02, "output_cost_per_image": 0.02, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "image" - ], - "supported_resolutions": [ - "1280x720", - "1920x1080" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["image"], + "supported_resolutions": ["1280x720", "1920x1080"], "metadata": { "comment": "2 credits per image (any resolution) @ $0.01 per credit = $0.02 per image" } @@ -27817,4 +25698,4 @@ "comment": "Estimated cost based on standard TTS pricing. RunwayML uses ElevenLabs models." } } -} \ No newline at end of file +}