diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index f052cbffbdb..8b05b811ba2 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -1155,6 +1155,7 @@ async def run_test_real( use_enhanced_map=True, verbose=verbose, yes_always_commands=True, + max_reflections=0, ), map_mul_no_files=4, mcp_manager=None, diff --git a/cecli/__init__.py b/cecli/__init__.py index 3323e971c76..7cee4865eb4 100644 --- a/cecli/__init__.py +++ b/cecli/__init__.py @@ -1,6 +1,6 @@ from packaging import version -__version__ = "0.97.1.dev" +__version__ = "0.97.3.dev" safe_version = __version__ try: diff --git a/cecli/args.py b/cecli/args.py index 7c1de5b0315..b87b4cfaddf 100644 --- a/cecli/args.py +++ b/cecli/args.py @@ -254,6 +254,12 @@ def get_parser(default_config_files, git_root): " If unspecified, defaults to the model's max_chat_history_tokens." ), ) + group.add_argument( + "--max-reflections", + type=int, + default=3, + help="Maximum number of retries a model gets on malformed outputs (default: 3)", + ) group.add_argument( "--file-diffs", action=argparse.BooleanOptionalAction, diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py index dd4ebdea870..eac72038b7b 100644 --- a/cecli/coders/agent_coder.py +++ b/cecli/coders/agent_coder.py @@ -7,7 +7,7 @@ import random import time import traceback -from collections import Counter, defaultdict +from collections import defaultdict from datetime import datetime from pathlib import Path @@ -85,6 +85,7 @@ def __init__(self, *args, **kwargs): self.allowed_context_blocks = set() self.context_block_tokens = {} self.context_blocks_cache = {} + self.hot_reload_enabled = False self.tokens_calculated = False self.skip_cli_confirmations = False self.agent_finished = False @@ -119,6 +120,7 @@ def _get_agent_config(self): config, "skip_cli_confirmations", nested.getter(config, "yolo", []) ) config["command_timeout"] = nested.getter(config, "command_timeout", 30) + config["hot_reload"] = nested.getter(config, "hot_reload", False) config["tools_paths"] = nested.getter(config, "tools_paths", []) config["tools_includelist"] = nested.getter( @@ -147,7 +149,7 @@ def _get_agent_config(self): self.large_file_token_threshold = config["large_file_token_threshold"] self.skip_cli_confirmations = config["skip_cli_confirmations"] - + self.hot_reload_enabled = config["hot_reload"] self.allowed_context_blocks = config["include_context_blocks"] for context_block in config["exclude_context_blocks"]: @@ -259,6 +261,7 @@ async def _execute_local_tool_calls(self, tool_calls_list): try: parsed_args_list.append(json.loads(chunk)) except json.JSONDecodeError as e: + self.model_kwargs = {} self.io.tool_warning( f"Could not parse JSON chunk for tool {tool_name}: {chunk}" ) @@ -320,6 +323,7 @@ async def _execute_local_tool_calls(self, tool_calls_list): result_message = "\n\n".join(all_results_content) except Exception as e: + self.model_kwargs = {} result_message = f"Error executing {tool_name}: {e}" self.io.tool_error(f"""Error during {tool_name} execution: {e} {traceback.format_exc()}""") @@ -843,6 +847,10 @@ async def reply_completed(self): self.files_edited_by_tools = set() return False + async def hot_reload(self): + if self.hot_reload_enabled: + self.skills_manager.hot_reload() + async def _execute_tool_with_registry(self, norm_tool_name, params): """ Execute a tool using the tool registry. @@ -879,12 +887,9 @@ def _get_repetitive_tools(self): """ Identifies repetitive tool usage patterns from rounds of tool calls. - This method combines count-based and similarity-based detection: + This method uses similarity-based detection: 1. If the last round contained a write tool, it assumes progress and returns no repetitive tools. - 2. It checks for any read tool that has been used 2 or more times across rounds. - 3. If no tools are repeated, but all tools in the history are read tools, - it flags all of them as potentially repetitive. - 4. It checks for similarity-based repetition using cosine similarity on tool call strings. + 2. It checks for similarity-based repetition using cosine similarity on tool call strings. It avoids flagging repetition if a "write" tool was used recently, as that suggests progress is being made. @@ -893,9 +898,6 @@ def _get_repetitive_tools(self): if history_len < 5: return set() similarity_repetitive_tools = self._get_repetitive_tools_by_similarity() - all_tools = [] - for round_tools in self.tool_usage_history: - all_tools.extend(round_tools) if self.last_round_tools: last_round_has_write = any( tool.lower() in self.write_tools for tool in self.last_round_tools @@ -909,24 +911,14 @@ def _get_repetitive_tools(self): if tool.lower() in self.read_tools or tool.lower() in self.write_tools } return filtered_similarity_tools if len(filtered_similarity_tools) else set() - if all(tool.lower() in self.read_tools for tool in all_tools): - # Only return tools that are in read_tools - return {tool for tool in all_tools if tool.lower() in self.read_tools} - tool_counts = Counter(all_tools) - count_repetitive_tools = { - tool - for tool, count in tool_counts.items() - if count >= 5 and tool.lower() in self.read_tools - } # Filter similarity_repetitive_tools to only include tools in read_tools or write_tools filtered_similarity_tools = { tool for tool in similarity_repetitive_tools if tool.lower() in self.read_tools or tool.lower() in self.write_tools } - repetitive_tools = count_repetitive_tools.union(filtered_similarity_tools) - if repetitive_tools: - return repetitive_tools + if filtered_similarity_tools: + return filtered_similarity_tools return set() def _get_repetitive_tools_by_similarity(self): @@ -983,6 +975,27 @@ def _generate_tool_context(self, repetitive_tools): context_parts.append("\n\n") if repetitive_tools: + if not self.model_kwargs: + self.model_kwargs = { + "temperature": (self.main_model.use_temperature or 1) + 0.1, + "frequency_penalty": 0.2, + "presence_penalty": 0.1, + } + else: + temperature = nested.getter(self.model_kwargs, "temperature") + freq_penalty = nested.getter(self.model_kwargs, "frequency_penalty") + if temperature and freq_penalty: + self.model_kwargs["temperature"] = min(temperature + 0.1, 2) + self.model_kwargs["frequency_penalty"] = min(freq_penalty + 0.1, 1) + + if random.random() < 0.25: + self.model_kwargs["temperature"] = max(temperature - 0.2, 1) + self.model_kwargs["frequency_penalty"] = max(freq_penalty - 0.2, 0) + + # One tenth of the time, just straight reset the randomness + if random.random() < 0.1: + self.model_kwargs = {} + if self.turn_count - self._last_repetitive_warning_turn > 2: self._last_repetitive_warning_turn = self.turn_count self._last_repetitive_warning_severity += 1 @@ -1040,7 +1053,7 @@ def _generate_tool_context(self, repetitive_tools): repetition_warning += f""" ### CRITICAL: Execution Loop Detected -You are currently "spinning." To break the logic trap, you must: +You are currently "spinning gears". To break the exploration loop, you must: 1. **Analyze**: Use the `Thinking` tool to summarize exactly what you have found so far and why you were stuck. 2. **Pivot**: Abandon or modify your current exploration strategy. Try focusing on different files or running tests. 3. **Reframe**: To ensure your logic reset, include a 2-sentence story about {animal} {verb} {fruit} in your thoughts. @@ -1049,6 +1062,9 @@ def _generate_tool_context(self, repetitive_tools): """ context_parts.append(repetition_warning) + else: + self.model_kwargs = {} + context_parts.append("") return "\n".join(context_parts) diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py index 77ee3e7ff74..ffeb31dae47 100755 --- a/cecli/coders/base_coder.py +++ b/cecli/coders/base_coder.py @@ -156,6 +156,7 @@ class Coder: tool_reflection = False last_user_message = "" uuid = "" + model_kwargs = {} # Task coordination state variables input_running = False @@ -380,6 +381,9 @@ def __init__( self.context_compaction_max_tokens = context_compaction_max_tokens self.context_compaction_summary_tokens = context_compaction_summary_tokens + self.max_reflections = ( + 3 if self.edit_format == "agent" else nested.getter(self.args, "max_reflections", 3) + ) if not fnames: fnames = [] @@ -1615,6 +1619,8 @@ async def run_one(self, user_message, preproc): async for _ in self.send_message(message): pass + await self.hot_reload() + if not self.reflected_message: await self.auto_save_session(force=True) break @@ -2750,6 +2756,9 @@ def get_tool_list(self): async def reply_completed(self): pass + async def hot_reload(self): + pass + async def show_exhausted_error(self): output_tokens = 0 if self.partial_response_content: @@ -2995,6 +3004,7 @@ async def send(self, messages, model=None, functions=None, tools=None): self.temperature, # This could include any tools, but for now it is just MCP tools tools=tools, + override_kwargs=self.model_kwargs, ) self.chat_completion_call_hashes.append(hash_object.hexdigest()) @@ -3166,7 +3176,7 @@ async def show_send_output_stream(self, completion): reasoning_content = None if reasoning_content: - if nested.getter(self, "args.show_thinking"): + if nested.getter(self.args, "show_thinking"): if not self.got_reasoning_content: text += f"<{REASONING_TAG}>\n\n" text += reasoning_content @@ -3202,7 +3212,7 @@ async def show_send_output_stream(self, completion): self.stream_wrapper(content_to_show, final=False) elif text: # Apply reasoning tag formatting for non-pretty output - if nested.getter(self, "args.show_thinking"): + if nested.getter(self.args, "show_thinking"): text = replace_reasoning_tags(text, self.reasoning_tag_name) try: self.stream_wrapper(text, final=False) @@ -3405,7 +3415,7 @@ def stream_wrapper(self, content, final): def live_incremental_response(self, final): show_resp = self.render_incremental_response(final) # Apply any reasoning tag formatting - if nested.getter(self, "args.show_thinking"): + if nested.getter(self.args, "show_thinking"): show_resp = replace_reasoning_tags(show_resp, self.reasoning_tag_name) # Track streaming state to avoid repetitive output diff --git a/cecli/commands/save_session.py b/cecli/commands/save_session.py index 799ab817d12..16a96d61453 100644 --- a/cecli/commands/save_session.py +++ b/cecli/commands/save_session.py @@ -25,9 +25,12 @@ async def execute(cls, io, coder, args, **kwargs): @classmethod def get_completions(cls, io, coder, args) -> List[str]: """Get completion options for save-session command.""" - # For save-session, we could return existing session names for completion - # For now, return empty list - return [] + # Return existing session names for completion to prevent accidental overwrites + from cecli import sessions + + session_manager = sessions.SessionManager(coder, io) + sessions_list = session_manager.list_sessions() + return [session_info["name"] for session_info in sessions_list] @classmethod def get_help(cls) -> str: @@ -40,4 +43,8 @@ def get_help(cls) -> str: help_text += " /save-session bug-fix # Save session as 'bug-fix'\n" help_text += "\nSessions are saved in the .cecli/sessions/ directory as JSON files.\n" help_text += "Use /list-sessions to see saved sessions and /load-session to load them.\n" + help_text += ( + "\nNote: Existing session names will be shown for tab completion to help prevent" + " accidental overwrites.\n" + ) return help_text diff --git a/cecli/helpers/conversation/integration.py b/cecli/helpers/conversation/integration.py index 30bd2b00856..66ba663a946 100644 --- a/cecli/helpers/conversation/integration.py +++ b/cecli/helpers/conversation/integration.py @@ -750,6 +750,7 @@ def add_static_context_blocks(cls, coder) -> None: message_dict={"role": "user", "content": block_content}, tag=MessageTag.STATIC, hash_key=("static", block_type), + force=True, ) @classmethod @@ -793,6 +794,7 @@ def add_pre_message_context_blocks(cls, coder) -> None: tag=MessageTag.STATIC, # Use STATIC tag but with different priority priority=125, # Between REPO (100) and READONLY_FILES (200) hash_key=("pre_message", block_type), + force=True, ) @classmethod diff --git a/cecli/helpers/nested.py b/cecli/helpers/nested.py index bdd88ab4f01..624a001df4b 100644 --- a/cecli/helpers/nested.py +++ b/cecli/helpers/nested.py @@ -81,3 +81,18 @@ def getter( return current return default + + +def deep_merge(dict1, dict2): + """ + Recursively merges dict2 into dict1. + If a key exists in both and both values are dicts, it merges the sub-dicts. + Otherwise, the value from dict2 overwrites the value from dict1. + """ + merged = dict1.copy() # Create a copy to avoid modifying original dict1 in place + for key, value in dict2.items(): + if key in merged and isinstance(merged[key], dict) and isinstance(value, dict): + merged[key] = deep_merge(merged[key], value) + else: + merged[key] = value + return merged diff --git a/cecli/helpers/skills.py b/cecli/helpers/skills.py index 6ad8df3349d..06c7fd24ff1 100644 --- a/cecli/helpers/skills.py +++ b/cecli/helpers/skills.py @@ -119,6 +119,11 @@ def find_skills(self, reload: bool = False) -> List[SkillMetadata]: self._skills_find_cache = skills return skills + def hot_reload(self): + self._skills_cache = {} + self._skill_metadata_cache = {} + self.find_skills(reload=True) + def _parse_skill_metadata(self, skill_md_path: Path) -> SkillMetadata: """ Parse the metadata from a SKILL.md file. diff --git a/cecli/main.py b/cecli/main.py index b27f9f3b251..bc6e9dfe5e9 100644 --- a/cecli/main.py +++ b/cecli/main.py @@ -645,7 +645,9 @@ def get_io(pretty): input_queue = None pre_init_io = get_io(args.pretty) # Check if we're in "send message and exit" mode to skip non-essential initialization - suppress_pre_init = args.message or args.message_file or args.apply_clipboard_edits + suppress_pre_init = ( + args.message or args.message_file or args.apply_clipboard_edits or args.terminal_setup + ) supress_tui = True if not suppress_pre_init: @@ -1158,6 +1160,8 @@ def apply_model_overrides(model_name): await coder.commands.execute("terminal-setup", "dry_run") else: await coder.commands.execute("terminal-setup", "") + return await graceful_exit(coder) + if args.lint or args.test or args.commit: return await graceful_exit(coder) if args.show_repo_map: diff --git a/cecli/models.py b/cecli/models.py index 418b7188101..d680d7e24ae 100644 --- a/cecli/models.py +++ b/cecli/models.py @@ -22,6 +22,7 @@ from cecli.helpers import nested from cecli.helpers.file_searcher import handle_core_files from cecli.helpers.model_providers import ModelProviderManager +from cecli.helpers.nested import deep_merge from cecli.helpers.requests import model_request_parser from cecli.llm import litellm from cecli.sendchat import sanity_check_messages @@ -955,6 +956,7 @@ async def send_completion( max_tokens=None, min_wait=0, max_wait=2, + override_kwargs={}, ): if os.environ.get("CECLI_SANITY_CHECK_TURNS"): sanity_check_messages(messages) @@ -1053,6 +1055,8 @@ async def send_completion( if random.random() < 0.25: await asyncio.sleep(random.uniform(min_wait, max_wait)) + if override_kwargs: + kwargs = deep_merge(kwargs, override_kwargs) res = await litellm.acompletion(**kwargs) return hash_object, res except litellm.ContextWindowExceededError as err: diff --git a/cecli/resources/model-metadata.json b/cecli/resources/model-metadata.json index bcce123910b..2ee97110a60 100644 --- a/cecli/resources/model-metadata.json +++ b/cecli/resources/model-metadata.json @@ -120,6 +120,25 @@ "supports_video_input": true, "supports_vision": true }, + "amazon.nova-2-pro-preview-20251202-v1:0": { + "cache_read_input_token_cost": 5.46875e-7, + "input_cost_per_token": 0.0000021875, + "input_cost_per_image_token": 0.0000021875, + "input_cost_per_audio_token": 0.0000021875, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000175, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, "apac.amazon.nova-2-lite-v1:0": { "cache_read_input_token_cost": 8.25e-8, "input_cost_per_token": 3.3e-7, @@ -137,6 +156,25 @@ "supports_video_input": true, "supports_vision": true }, + "apac.amazon.nova-2-pro-preview-20251202-v1:0": { + "cache_read_input_token_cost": 5.46875e-7, + "input_cost_per_token": 0.0000021875, + "input_cost_per_image_token": 0.0000021875, + "input_cost_per_audio_token": 0.0000021875, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000175, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, "eu.amazon.nova-2-lite-v1:0": { "cache_read_input_token_cost": 8.25e-8, "input_cost_per_token": 3.3e-7, @@ -154,6 +192,25 @@ "supports_video_input": true, "supports_vision": true }, + "eu.amazon.nova-2-pro-preview-20251202-v1:0": { + "cache_read_input_token_cost": 5.46875e-7, + "input_cost_per_token": 0.0000021875, + "input_cost_per_image_token": 0.0000021875, + "input_cost_per_audio_token": 0.0000021875, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000175, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, "us.amazon.nova-2-lite-v1:0": { "cache_read_input_token_cost": 8.25e-8, "input_cost_per_token": 3.3e-7, @@ -171,6 +228,25 @@ "supports_video_input": true, "supports_vision": true }, + "us.amazon.nova-2-pro-preview-20251202-v1:0": { + "cache_read_input_token_cost": 5.46875e-7, + "input_cost_per_token": 0.0000021875, + "input_cost_per_image_token": 0.0000021875, + "input_cost_per_audio_token": 0.0000021875, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000175, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, "amazon.nova-micro-v1:0": { "input_cost_per_token": 3.5e-8, "litellm_provider": "bedrock_converse", @@ -304,12 +380,13 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_streaming": true }, "anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", - "max_input_tokens": 200000, + "max_input_tokens": 1000000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", @@ -318,14 +395,22 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.00003, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "cache_creation_input_token_cost_above_1hr": 0.0000075, + "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 0.000015, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7 }, "anthropic.claude-3-5-sonnet-20241022-v2:0": { "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", - "max_input_tokens": 200000, + "max_input_tokens": 1000000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", @@ -337,7 +422,13 @@ "supports_prompt_caching": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.00003, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "cache_creation_input_token_cost_above_1hr": 0.0000075, + "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 0.000015 }, "anthropic.claude-3-7-sonnet-20240620-v1:0": { "cache_creation_input_token_cost": 0.0000045, @@ -391,7 +482,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 2.5e-8, + "cache_creation_input_token_cost": 3.125e-7 }, "anthropic.claude-3-opus-20240229-v1:0": { "input_cost_per_token": 0.000015, @@ -404,7 +497,9 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 0.0000015, + "cache_creation_input_token_cost": 0.00001875 }, "anthropic.claude-3-sonnet-20240229-v1:0": { "input_cost_per_token": 0.000003, @@ -418,7 +513,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-7, + "cache_creation_input_token_cost": 0.00000375 }, "anthropic.claude-instant-v1": { "input_cost_per_token": 8e-7, @@ -508,20 +605,170 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, - "anthropic.claude-sonnet-4-20250514-v1:0": { + "anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000125, + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.000001, + "input_cost_per_token": 0.000005, + "input_cost_per_token_above_200k_tokens": 0.00001, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "output_cost_per_token_above_200k_tokens": 0.0000375, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "global.anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000125, + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.000001, + "input_cost_per_token": 0.000005, + "input_cost_per_token_above_200k_tokens": 0.00001, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "output_cost_per_token_above_200k_tokens": 0.0000375, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "us.anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 0.000006875, + "cache_creation_input_token_cost_above_200k_tokens": 0.00001375, + "cache_read_input_token_cost": 5.5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.0000011, + "input_cost_per_token": 0.0000055, + "input_cost_per_token_above_200k_tokens": 0.000011, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.0000275, + "output_cost_per_token_above_200k_tokens": 0.00004125, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "eu.anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 0.000006875, + "cache_creation_input_token_cost_above_200k_tokens": 0.00001375, + "cache_read_input_token_cost": 5.5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.0000011, + "input_cost_per_token": 0.0000055, + "input_cost_per_token_above_200k_tokens": 0.000011, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.0000275, + "output_cost_per_token_above_200k_tokens": 0.00004125, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "au.anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 0.000006875, + "cache_creation_input_token_cost_above_200k_tokens": 0.00001375, + "cache_read_input_token_cost": 5.5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.0000011, + "input_cost_per_token": 0.0000055, + "input_cost_per_token_above_200k_tokens": 0.000011, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.0000275, + "output_cost_per_token_above_200k_tokens": 0.00004125, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "anthropic.claude-sonnet-4-6": { "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost": 3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "input_cost_per_token": 0.000003, "input_cost_per_token_above_200k_tokens": 0.000006, - "output_cost_per_token_above_200k_tokens": 0.0000225, - "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, - "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", - "max_input_tokens": 1000000, + "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.0000225, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -536,22 +783,22 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 346 }, - "anthropic.claude-sonnet-4-5-20250929-v1:0": { + "global.anthropic.claude-sonnet-4-6": { "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost": 3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "input_cost_per_token": 0.000003, "input_cost_per_token_above_200k_tokens": 0.000006, - "output_cost_per_token_above_200k_tokens": 0.0000225, - "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, - "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.0000225, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -566,21 +813,171 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 346 }, - "anthropic.claude-v1": { - "input_cost_per_token": 0.000008, - "litellm_provider": "bedrock", - "max_input_tokens": 100000, - "max_output_tokens": 8191, - "max_tokens": 8191, + "us.anthropic.claude-sonnet-4-6": { + "cache_creation_input_token_cost": 0.000004125, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, + "cache_read_input_token_cost": 3.3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 0.000024 + "output_cost_per_token": 0.0000165, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 }, - "anthropic.claude-v2:1": { - "input_cost_per_token": 0.000008, - "litellm_provider": "bedrock", - "max_input_tokens": 100000, + "eu.anthropic.claude-sonnet-4-6": { + "cache_creation_input_token_cost": 0.000004125, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, + "cache_read_input_token_cost": 3.3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000165, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "apac.anthropic.claude-sonnet-4-6": { + "cache_creation_input_token_cost": 0.000004125, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, + "cache_read_input_token_cost": 3.3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000165, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "anthropic.claude-v1": { + "input_cost_per_token": 0.000008, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.000024 + }, + "anthropic.claude-v2:1": { + "input_cost_per_token": 0.000008, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, "max_output_tokens": 8191, "max_tokens": 8191, "mode": "chat", @@ -757,7 +1154,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-7, + "cache_creation_input_token_cost": 0.00000375 }, "apac.anthropic.claude-3-5-sonnet-20241022-v2:0": { "cache_creation_input_token_cost": 0.00000375, @@ -790,7 +1189,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 2.5e-8, + "cache_creation_input_token_cost": 3.125e-7 }, "apac.anthropic.claude-haiku-4-5-20251001-v1:0": { "cache_creation_input_token_cost": 0.000001375, @@ -826,7 +1227,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-7, + "cache_creation_input_token_cost": 0.00000375 }, "apac.anthropic.claude-sonnet-4-20250514-v1:0": { "cache_creation_input_token_cost": 0.00000375, @@ -899,6 +1302,9 @@ "supports_function_calling": true }, "azure_ai/claude-haiku-4-5": { + "cache_creation_input_token_cost": 0.00000125, + "cache_creation_input_token_cost_above_1hr": 0.000002, + "cache_read_input_token_cost": 1e-7, "input_cost_per_token": 0.000001, "litellm_provider": "azure_ai", "max_input_tokens": 200000, @@ -916,7 +1322,58 @@ "supports_tool_choice": true, "supports_vision": true }, + "azure_ai/claude-opus-4-5": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/claude-opus-4-6": { + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000025, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, "azure_ai/claude-opus-4-1": { + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.00003, + "cache_read_input_token_cost": 0.0000015, "input_cost_per_token": 0.000015, "litellm_provider": "azure_ai", "max_input_tokens": 200000, @@ -935,6 +1392,9 @@ "supports_vision": true }, "azure_ai/claude-sonnet-4-5": { + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, + "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, "litellm_provider": "azure_ai", "max_input_tokens": 200000, @@ -952,6 +1412,28 @@ "supports_tool_choice": true, "supports_vision": true }, + "azure_ai/claude-sonnet-4-6": { + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "azure/computer-use-preview": { "input_cost_per_token": 0.000003, "litellm_provider": "azure", @@ -984,6 +1466,28 @@ "litellm_provider": "azure", "mode": "chat" }, + "azure_ai/gpt-oss-120b": { + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "source": "https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "azure_ai/model_router": { + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 0, + "litellm_provider": "azure_ai", + "mode": "chat", + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-services/", + "comment": "Flat cost of $0.14 per M input tokens for Azure AI Foundry Model Router infrastructure. Use pattern: azure_ai/model_router/ where deployment-name is your Azure deployment (e.g., azure-model-router)" + }, "azure/eu/gpt-4o-2024-08-06": { "deprecation_date": "2026-02-27", "cache_read_input_token_cost": 0.000001375, @@ -1199,7 +1703,7 @@ "cache_read_input_token_cost": 1.4e-7, "input_cost_per_token": 0.00000138, "litellm_provider": "azure", - "max_input_tokens": 272000, + "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -1439,7 +1943,7 @@ "cache_read_input_token_cost": 1.25e-7, "input_cost_per_token": 0.00000125, "litellm_provider": "azure", - "max_input_tokens": 272000, + "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -1522,7 +2026,7 @@ "litellm_provider": "azure", "max_input_tokens": 4097, "max_output_tokens": 4096, - "max_tokens": 4097, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.000002, "supports_function_calling": true, @@ -1535,7 +2039,7 @@ "litellm_provider": "azure", "max_input_tokens": 4097, "max_output_tokens": 4096, - "max_tokens": 4097, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.000002, "supports_function_calling": true, @@ -1991,16 +2495,16 @@ "supports_tool_choice": true, "supports_vision": false }, - "azure/gpt-audio-mini-2025-10-06": { - "input_cost_per_audio_token": 0.00001, - "input_cost_per_token": 6e-7, + "azure/gpt-audio-1.5-2026-02-23": { + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 0.00002, - "output_cost_per_token": 0.0000024, + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00001, "supported_endpoints": [ "/v1/chat/completions" ], @@ -2022,17 +2526,48 @@ "supports_tool_choice": true, "supports_vision": false }, - "azure/gpt-4o-audio-preview-2024-12-17": { - "input_cost_per_audio_token": 0.00004, - "input_cost_per_token": 0.0000025, + "azure/gpt-audio-mini-2025-10-06": { + "input_cost_per_audio_token": 0.00001, + "input_cost_per_token": 6e-7, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 0.00008, - "output_cost_per_token": 0.00001, - "supported_endpoints": [ + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/gpt-4o-audio-preview-2024-12-17": { + "input_cost_per_audio_token": 0.00004, + "input_cost_per_token": 0.0000025, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 0.00008, + "output_cost_per_token": 0.00001, + "supported_endpoints": [ "/v1/chat/completions" ], "supported_modalities": [ @@ -2167,6 +2702,38 @@ "supports_system_messages": true, "supports_tool_choice": true }, + "azure/gpt-realtime-1.5-2026-02-23": { + "cache_creation_input_audio_token_cost": 0.000004, + "cache_read_input_token_cost": 0.000004, + "input_cost_per_audio_token": 0.000032, + "input_cost_per_image": 0.000005, + "input_cost_per_token": 0.000004, + "litellm_provider": "azure", + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.000064, + "output_cost_per_token": 0.000016, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, "azure/gpt-realtime-mini-2025-10-06": { "cache_creation_input_audio_token_cost": 3e-7, "cache_read_input_token_cost": 6e-8, @@ -2383,9 +2950,9 @@ "cache_read_input_token_cost": 1.25e-7, "input_cost_per_token": 0.00000125, "litellm_provider": "azure", - "max_input_tokens": 272000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, "source": "https://azure.microsoft.com/en-us/blog/gpt-5-in-azure-ai-foundry-the-future-of-ai-apps-and-agents-starts-here/", @@ -2409,7 +2976,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, - "supports_tool_choice": false, + "supports_tool_choice": true, "supports_vision": true }, "azure/gpt-5-chat-latest": { @@ -2441,7 +3008,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, - "supports_tool_choice": false, + "supports_tool_choice": true, "supports_vision": true }, "azure/gpt-5-mini": { @@ -2609,7 +3176,7 @@ "cache_read_input_token_cost": 1.25e-7, "input_cost_per_token": 0.00000125, "litellm_provider": "azure", - "max_input_tokens": 272000, + "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -2642,7 +3209,7 @@ "cache_read_input_token_cost": 1.75e-7, "input_cost_per_token": 0.00000175, "litellm_provider": "azure", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -2677,7 +3244,7 @@ "input_cost_per_token": 0.00000175, "input_cost_per_token_priority": 0.0000035, "litellm_provider": "azure", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -2708,6 +3275,40 @@ "supports_service_tier": true, "supports_vision": true }, + "azure/gpt-5.2-chat": { + "cache_read_input_token_cost": 1.75e-7, + "cache_read_input_token_cost_priority": 3.5e-7, + "input_cost_per_token": 0.00000175, + "input_cost_per_token_priority": 0.0000035, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 0.000014, + "output_cost_per_token_priority": 0.000028, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, "azure/gpt-5.2-chat-2025-12-11": { "cache_read_input_token_cost": 1.75e-7, "cache_read_input_token_cost_priority": 3.5e-7, @@ -3335,7 +3936,7 @@ "cache_read_input_token_cost": 1.4e-7, "input_cost_per_token": 0.00000138, "litellm_provider": "azure", - "max_input_tokens": 272000, + "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -3766,7 +4367,7 @@ "litellm_provider": "azure_ai", "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 8192, + "max_tokens": 163840, "mode": "chat", "output_cost_per_token": 0.00000168, "supports_assistant_prefill": true, @@ -3780,7 +4381,7 @@ "litellm_provider": "azure_ai", "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 8192, + "max_tokens": 163840, "mode": "chat", "output_cost_per_token": 0.00000168, "supports_assistant_prefill": true, @@ -3854,28 +4455,28 @@ "supports_web_search": true }, "azure_ai/grok-3": { - "input_cost_per_token": 0.0000033, + "input_cost_per_token": 0.000003, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.0000165, - "source": "https://devblogs.microsoft.com/foundry/announcing-grok-3-and-grok-3-mini-on-azure-ai-foundry/", + "output_cost_per_token": 0.000015, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", "supports_function_calling": true, "supports_response_schema": false, "supports_tool_choice": true, "supports_web_search": true }, "azure_ai/grok-3-mini": { - "input_cost_per_token": 2.75e-7, + "input_cost_per_token": 2.5e-7, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.00000138, - "source": "https://devblogs.microsoft.com/foundry/announcing-grok-3-and-grok-3-mini-on-azure-ai-foundry/", + "output_cost_per_token": 0.00000127, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", "supports_function_calling": true, "supports_reasoning": true, "supports_response_schema": false, @@ -3883,22 +4484,22 @@ "supports_web_search": true }, "azure_ai/grok-4": { - "input_cost_per_token": 0.0000055, + "input_cost_per_token": 0.000003, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.0000275, - "source": "https://azure.microsoft.com/en-us/blog/grok-4-is-now-available-in-azure-ai-foundry-unlock-frontier-intelligence-and-business-ready-capabilities/", + "output_cost_per_token": 0.000015, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_web_search": true }, "azure_ai/grok-4-fast-non-reasoning": { - "input_cost_per_token": 4.3e-7, - "output_cost_per_token": 0.00000173, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, @@ -3910,28 +4511,28 @@ "supports_web_search": true }, "azure_ai/grok-4-fast-reasoning": { - "input_cost_per_token": 4.3e-7, - "output_cost_per_token": 0.00000173, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/announcing-the-grok-4-fast-models-from-xai-now-available-in-azure-ai-foundry/4456701", + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_web_search": true }, "azure_ai/grok-code-fast-1": { - "input_cost_per_token": 0.0000035, + "input_cost_per_token": 2e-7, "litellm_provider": "azure_ai", "max_input_tokens": 131072, "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.0000175, - "source": "https://azure.microsoft.com/en-us/blog/grok-4-is-now-available-in-azure-ai-foundry-unlock-frontier-intelligence-and-business-ready-capabilities/", + "output_cost_per_token": 0.0000015, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, @@ -3957,6 +4558,20 @@ "output_cost_per_token": 7e-7, "supports_tool_choice": true }, + "azure_ai/kimi-k2.5": { + "input_cost_per_token": 6e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/kimi-k2-5-now-in-microsoft-foundry/4492321", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true + }, "azure_ai/ministral-3b": { "input_cost_per_token": 4e-8, "litellm_provider": "azure_ai", @@ -4052,13 +4667,13 @@ "supports_tool_choice": true }, "azure_ai/mistral-small-2503": { - "input_cost_per_token": 0.000001, + "input_cost_per_token": 1e-7, "litellm_provider": "azure_ai", "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 0.000003, + "output_cost_per_token": 3e-7, "supports_function_calling": true, "supports_tool_choice": true, "supports_vision": true @@ -4191,6 +4806,97 @@ "output_cost_per_token": 0.000024, "supports_tool_choice": true }, + "bedrock/ap-northeast-1/deepseek.v3.2": { + "input_cost_per_token": 7.4e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000222, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-northeast-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-northeast-1/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 7.3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.00000303, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/ap-northeast-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000036, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-northeast-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 7.3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.00000303, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.00000303, + "source": "https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true + }, "bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": { "input_cost_per_token": 0.00000318, "litellm_provider": "bedrock", @@ -4209,33 +4915,190 @@ "mode": "chat", "output_cost_per_token": 7.2e-7 }, - "bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 0.00000305, + "bedrock/ap-south-1/deepseek.v3.2": { + "input_cost_per_token": 7.4e-7, "litellm_provider": "bedrock", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, "mode": "chat", - "output_cost_per_token": 0.00000403 + "output_cost_per_token": 0.00000222, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" }, - "bedrock/ca-central-1/meta.llama3-8b-instruct-v1:0": { - "input_cost_per_token": 3.5e-7, + "bedrock/ap-south-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, "litellm_provider": "bedrock", - "max_input_tokens": 8192, + "max_input_tokens": 196000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 6.9e-7 + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" }, - "bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1": { - "input_cost_per_second": 0.01635, + "bedrock/ap-south-1/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 7.1e-7, "litellm_provider": "bedrock", - "max_input_tokens": 100000, - "max_output_tokens": 8191, - "max_tokens": 8191, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, "mode": "chat", - "output_cost_per_second": 0.01635, - "supports_tool_choice": true + "output_cost_per_token": 0.00000294, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/ap-south-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000036, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-south-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-southeast-3/deepseek.v3.2": { + "input_cost_per_token": 7.4e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000222, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-southeast-3/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-southeast-3/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000036, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-southeast-3/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 0.00000305, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000403 + }, + "bedrock/ca-central-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6.9e-7 + }, + "bedrock/eu-north-1/deepseek.v3.2": { + "input_cost_per_token": 7.4e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000222, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-north-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-north-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000036, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.01635, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.01635, + "supports_tool_choice": true }, "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v1": { "input_cost_per_second": 0.0415, @@ -4314,6 +5177,32 @@ "output_cost_per_token": 0.000024, "supports_tool_choice": true }, + "bedrock/eu-central-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-central-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0": { "input_cost_per_token": 0.00000286, "litellm_provider": "bedrock", @@ -4332,6 +5221,32 @@ "mode": "chat", "output_cost_per_token": 6.5e-7 }, + "bedrock/eu-west-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-west-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0": { "input_cost_per_token": 0.00000345, "litellm_provider": "bedrock", @@ -4350,6 +5265,32 @@ "mode": "chat", "output_cost_per_token": 7.8e-7 }, + "bedrock/eu-west-2/minimax.minimax-m2.1": { + "input_cost_per_token": 4.7e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000186, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-west-2/qwen.qwen3-coder-next": { + "input_cost_per_token": 7.8e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000186, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "bedrock/eu-west-3/mistral.mistral-7b-instruct-v0:2": { "input_cost_per_token": 2e-7, "litellm_provider": "bedrock", @@ -4380,6 +5321,32 @@ "output_cost_per_token": 9.1e-7, "supports_tool_choice": true }, + "bedrock/eu-south-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-south-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "bedrock/invoke/anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", @@ -4394,7 +5361,9 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-7, + "cache_creation_input_token_cost": 0.00000375 }, "bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": { "input_cost_per_token": 0.00000445, @@ -4414,6 +5383,70 @@ "mode": "chat", "output_cost_per_token": 0.00000101 }, + "bedrock/sa-east-1/deepseek.v3.2": { + "input_cost_per_token": 7.4e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000222, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/sa-east-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/sa-east-1/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 7.3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.00000303, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/sa-east-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000036, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/sa-east-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1": { "input_cost_per_second": 0.011, "litellm_provider": "bedrock", @@ -4550,19 +5583,147 @@ "output_cost_per_token": 7e-7, "supports_tool_choice": true }, - "bedrock/us-gov-east-1/amazon.nova-pro-v1:0": { - "input_cost_per_token": 9.6e-7, + "bedrock/us-east-1/deepseek.v3.2": { + "input_cost_per_token": 6.2e-7, "litellm_provider": "bedrock", - "max_input_tokens": 300000, - "max_output_tokens": 10000, - "max_tokens": 10000, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, "mode": "chat", - "output_cost_per_token": 0.00000384, + "output_cost_per_token": 0.00000185, "supports_function_calling": true, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_response_schema": true, - "supports_vision": true + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-1/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/us-east-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-2/deepseek.v3.2": { + "input_cost_per_token": 6.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000185, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-2/minimax.minimax-m2.1": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-2/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/us-east-2/moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-2/qwen.qwen3-coder-next": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-gov-east-1/amazon.nova-pro-v1:0": { + "input_cost_per_token": 9.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 0.00000384, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true }, "bedrock/us-gov-east-1/amazon.titan-text-express-v1": { "input_cost_per_token": 0.0000013, @@ -4603,7 +5764,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3.6e-7, + "cache_creation_input_token_cost": 0.0000045 }, "bedrock/us-gov-east-1/anthropic.claude-3-haiku-20240307-v1:0": { "input_cost_per_token": 3e-7, @@ -4617,7 +5780,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-8, + "cache_creation_input_token_cost": 3.75e-7 }, "bedrock/us-gov-east-1/claude-sonnet-4-5-20250929-v1:0": { "input_cost_per_token": 0.0000033, @@ -4635,7 +5800,9 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3.3e-7, + "cache_creation_input_token_cost": 0.000004125 }, "bedrock/us-gov-east-1/meta.llama3-70b-instruct-v1:0": { "input_cost_per_token": 0.00000265, @@ -4730,7 +5897,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3.6e-7, + "cache_creation_input_token_cost": 0.0000045 }, "bedrock/us-gov-west-1/anthropic.claude-3-haiku-20240307-v1:0": { "input_cost_per_token": 3e-7, @@ -4744,7 +5913,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-8, + "cache_creation_input_token_cost": 3.75e-7 }, "bedrock/us-gov-west-1/claude-sonnet-4-5-20250929-v1:0": { "input_cost_per_token": 0.0000033, @@ -4762,7 +5933,9 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3.3e-7, + "cache_creation_input_token_cost": 0.000004125 }, "bedrock/us-gov-west-1/meta.llama3-70b-instruct-v1:0": { "input_cost_per_token": 0.00000265, @@ -4920,6 +6093,70 @@ "output_cost_per_token": 7e-7, "supports_tool_choice": true }, + "bedrock/us-west-2/deepseek.v3.2": { + "input_cost_per_token": 6.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000185, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-west-2/minimax.minimax-m2.1": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-west-2/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/us-west-2/moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-west-2/qwen.qwen3-coder-next": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0": { "cache_creation_input_token_cost": 0.000001, "cache_read_input_token_cost": 8e-8, @@ -4971,13 +6208,13 @@ "supports_tool_choice": true }, "cerebras/gpt-oss-120b": { - "input_cost_per_token": 2.5e-7, + "input_cost_per_token": 3.5e-7, "litellm_provider": "cerebras", "max_input_tokens": 131072, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6.9e-7, + "output_cost_per_token": 7.5e-7, "source": "https://www.cerebras.ai/blog/openai-gpt-oss-120b-runs-fastest-on-cerebras", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -4995,9 +6232,24 @@ "output_cost_per_token": 8e-7, "source": "https://inference-docs.cerebras.ai/support/pricing", "supports_function_calling": true, + "supports_reasoning": true, "supports_tool_choice": true }, "cerebras/zai-glm-4.6": { + "deprecation_date": "2026-01-20", + "input_cost_per_token": 0.00000225, + "litellm_provider": "cerebras", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00000275, + "source": "https://www.cerebras.ai/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "cerebras/zai-glm-4.7": { "input_cost_per_token": 0.00000225, "litellm_provider": "cerebras", "max_input_tokens": 128000, @@ -5426,7 +6678,7 @@ "litellm_provider": "anthropic", "max_input_tokens": 1000000, "max_output_tokens": 64000, - "max_tokens": 1000000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.000015, "output_cost_per_token_above_200k_tokens": 0.0000225, @@ -5444,6 +6696,7 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, + "supports_web_search": true, "tool_use_system_prompt_tokens": 159 }, "claude-sonnet-4-5": { @@ -5507,6 +6760,36 @@ "supports_web_search": true, "tool_use_system_prompt_tokens": 346 }, + "claude-sonnet-4-6": { + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost": 3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "claude-sonnet-4-5-20250929-v1:0": { "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, @@ -5669,28 +6952,27 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, - "claude-sonnet-4-20250514": { - "deprecation_date": "2026-05-14", - "cache_creation_input_token_cost": 0.00000375, - "cache_creation_input_token_cost_above_1hr": 0.000006, - "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 0.000003, - "input_cost_per_token_above_200k_tokens": 0.000006, - "output_cost_per_token_above_200k_tokens": 0.0000225, - "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, - "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "claude-opus-4-6": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000125, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.000001, + "input_cost_per_token": 0.000005, + "input_cost_per_token_above_200k_tokens": 0.00001, "litellm_provider": "anthropic", "max_input_tokens": 1000000, - "max_output_tokens": 64000, - "max_tokens": 64000, + "max_output_tokens": 128000, + "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 0.000015, + "output_cost_per_token": 0.000025, + "output_cost_per_token_above_200k_tokens": 0.0000375, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, "search_context_size_medium": 0.01 }, - "supports_assistant_prefill": true, + "supports_assistant_prefill": false, "supports_computer_use": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -5699,13 +6981,84 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 346, + "provider_specific_entry": { + "us": 1.1, + "fast": 6 + } }, - "cloudflare/@cf/meta/llama-2-7b-chat-fp16": { - "input_cost_per_token": 0.000001923, - "litellm_provider": "cloudflare", - "max_input_tokens": 3072, - "max_output_tokens": 3072, + "claude-opus-4-6-20260205": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000125, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.000001, + "input_cost_per_token": 0.000005, + "input_cost_per_token_above_200k_tokens": 0.00001, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "output_cost_per_token_above_200k_tokens": 0.0000375, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "provider_specific_entry": { + "us": 1.1, + "fast": 6 + } + }, + "claude-sonnet-4-20250514": { + "deprecation_date": "2026-05-14", + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "cloudflare/@cf/meta/llama-2-7b-chat-fp16": { + "input_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", + "max_input_tokens": 3072, + "max_output_tokens": 3072, "max_tokens": 3072, "mode": "chat", "output_cost_per_token": 0.000001923 @@ -5997,14 +7350,14 @@ "supports_vision": true }, "deepseek-chat": { - "cache_read_input_token_cost": 3e-8, - "input_cost_per_token": 3e-7, + "cache_read_input_token_cost": 2.8e-8, + "input_cost_per_token": 2.8e-7, "litellm_provider": "deepseek", - "max_input_tokens": 128000, + "max_input_tokens": 131072, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5e-7, + "output_cost_per_token": 4.2e-7, "source": "https://api-docs.deepseek.com/quick_start/pricing", "supported_endpoints": [ "/v1/chat/completions" @@ -6018,14 +7371,14 @@ "supports_tool_choice": true }, "deepseek-reasoner": { - "cache_read_input_token_cost": 3e-8, - "input_cost_per_token": 3e-7, + "cache_read_input_token_cost": 2.8e-8, + "input_cost_per_token": 2.8e-7, "litellm_provider": "deepseek", - "max_input_tokens": 128000, + "max_input_tokens": 131072, "max_output_tokens": 65536, - "max_tokens": 128000, + "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 5e-7, + "output_cost_per_token": 4.2e-7, "source": "https://api-docs.deepseek.com/quick_start/pricing", "supported_endpoints": [ "/v1/chat/completions" @@ -6044,7 +7397,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 1000000, "max_output_tokens": 16384, - "max_tokens": 1000000, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.0000015, "source": "https://www.alibabacloud.com/help/en/model-studio/models", @@ -6056,7 +7409,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 997952, "max_output_tokens": 32768, - "max_tokens": 1000000, + "max_tokens": 32768, "mode": "chat", "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, @@ -6085,7 +7438,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 997952, "max_output_tokens": 32768, - "max_tokens": 1000000, + "max_tokens": 32768, "mode": "chat", "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, @@ -6115,7 +7468,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 30720, "max_output_tokens": 8192, - "max_tokens": 32768, + "max_tokens": 8192, "mode": "chat", "output_cost_per_token": 0.0000064, "source": "https://www.alibabacloud.com/help/en/model-studio/models", @@ -6128,7 +7481,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 129024, "max_output_tokens": 16384, - "max_tokens": 131072, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.0000012, "source": "https://www.alibabacloud.com/help/en/model-studio/models", @@ -6141,7 +7494,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 129024, "max_output_tokens": 8192, - "max_tokens": 131072, + "max_tokens": 8192, "mode": "chat", "output_cost_per_token": 0.0000012, "source": "https://www.alibabacloud.com/help/en/model-studio/models", @@ -6154,7 +7507,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 129024, "max_output_tokens": 16384, - "max_tokens": 131072, + "max_tokens": 16384, "mode": "chat", "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, @@ -6168,7 +7521,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 129024, "max_output_tokens": 16384, - "max_tokens": 131072, + "max_tokens": 16384, "mode": "chat", "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, @@ -6181,7 +7534,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 997952, "max_output_tokens": 32768, - "max_tokens": 1000000, + "max_tokens": 32768, "mode": "chat", "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, @@ -6212,7 +7565,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 997952, "max_output_tokens": 32768, - "max_tokens": 1000000, + "max_tokens": 32768, "mode": "chat", "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, @@ -6243,7 +7596,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 997952, "max_output_tokens": 32768, - "max_tokens": 1000000, + "max_tokens": 32768, "mode": "chat", "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, @@ -6275,7 +7628,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 129024, "max_output_tokens": 16384, - "max_tokens": 131072, + "max_tokens": 16384, "mode": "chat", "output_cost_per_reasoning_token": 5e-7, "output_cost_per_token": 2e-7, @@ -6289,7 +7642,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 1000000, "max_output_tokens": 8192, - "max_tokens": 1000000, + "max_tokens": 8192, "mode": "chat", "output_cost_per_token": 2e-7, "source": "https://www.alibabacloud.com/help/en/model-studio/models", @@ -6302,7 +7655,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 1000000, "max_output_tokens": 16384, - "max_tokens": 1000000, + "max_tokens": 16384, "mode": "chat", "output_cost_per_reasoning_token": 5e-7, "output_cost_per_token": 2e-7, @@ -6316,7 +7669,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 1000000, "max_output_tokens": 16384, - "max_tokens": 1000000, + "max_tokens": 16384, "mode": "chat", "output_cost_per_reasoning_token": 5e-7, "output_cost_per_token": 2e-7, @@ -6329,7 +7682,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 129024, "max_output_tokens": 16384, - "max_tokens": 131072, + "max_tokens": 16384, "mode": "chat", "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, @@ -6340,7 +7693,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 997952, "max_output_tokens": 65536, - "max_tokens": 1000000, + "max_tokens": 65536, "mode": "chat", "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, @@ -6389,7 +7742,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 997952, "max_output_tokens": 65536, - "max_tokens": 1000000, + "max_tokens": 65536, "mode": "chat", "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, @@ -6434,7 +7787,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 997952, "max_output_tokens": 65536, - "max_tokens": 1000000, + "max_tokens": 65536, "mode": "chat", "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, @@ -6483,7 +7836,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 997952, "max_output_tokens": 65536, - "max_tokens": 1000000, + "max_tokens": 65536, "mode": "chat", "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, @@ -6528,7 +7881,7 @@ "litellm_provider": "dashscope", "max_input_tokens": 258048, "max_output_tokens": 65536, - "max_tokens": 262144, + "max_tokens": 65536, "mode": "chat", "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, @@ -6561,88 +7914,309 @@ } ] }, - "dashscope/qwq-plus": { - "input_cost_per_token": 8e-7, + "dashscope/qwen3-max": { "litellm_provider": "dashscope", - "max_input_tokens": 98304, - "max_output_tokens": 8192, - "max_tokens": 131072, + "max_input_tokens": 258048, + "max_output_tokens": 65536, + "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.0000024, "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 0.0000012, + "output_cost_per_token": 0.000006, + "range": [ + 0, + 32000 + ] + }, + { + "input_cost_per_token": 0.0000024, + "output_cost_per_token": 0.000012, + "range": [ + 32000, + 128000 + ] + }, + { + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "range": [ + 128000, + 252000 + ] + } + ] }, - "databricks/databricks-claude-3-7-sonnet": { - "input_cost_per_token": 0.0000029999900000000002, - "input_dbu_cost_per_token": 0.000042857, - "litellm_provider": "databricks", - "max_input_tokens": 200000, - "max_output_tokens": 128000, - "max_tokens": 200000, - "metadata": { - "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." - }, + "dashscope/qwen3-max-2026-01-23": { + "litellm_provider": "dashscope", + "max_input_tokens": 258048, + "max_output_tokens": 65536, + "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.000015000020000000002, - "output_dbu_cost_per_token": 0.000214286, - "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", - "supports_assistant_prefill": true, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", "supports_function_calling": true, "supports_reasoning": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 0.0000012, + "output_cost_per_token": 0.000006, + "range": [ + 0, + 32000 + ] + }, + { + "input_cost_per_token": 0.0000024, + "output_cost_per_token": 0.000012, + "range": [ + 32000, + 128000 + ] + }, + { + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "range": [ + 128000, + 252000 + ] + } + ] }, - "databricks/databricks-claude-haiku-4-5": { - "input_cost_per_token": 0.00000100002, - "input_dbu_cost_per_token": 0.000014286, - "litellm_provider": "databricks", - "max_input_tokens": 200000, - "max_output_tokens": 64000, - "max_tokens": 200000, - "metadata": { - "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." - }, + "dashscope/qwen3-next-80b-a3b-instruct": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "dashscope", + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.00000500003, - "output_dbu_cost_per_token": 0.000071429, - "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", - "supports_assistant_prefill": true, + "output_cost_per_token": 0.0000012, + "source": "https://www.alibabacloud.com/help/en/model-studio/model-pricing", "supports_function_calling": true, - "supports_reasoning": true, "supports_tool_choice": true }, - "databricks/databricks-claude-opus-4": { - "input_cost_per_token": 0.000015000020000000002, - "input_dbu_cost_per_token": 0.000214286, - "litellm_provider": "databricks", - "max_input_tokens": 200000, - "max_output_tokens": 32000, - "max_tokens": 200000, - "metadata": { - "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." - }, + "dashscope/qwen3-next-80b-a3b-thinking": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "dashscope", + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.00007500003000000001, - "output_dbu_cost_per_token": 0.001071429, - "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", - "supports_assistant_prefill": true, + "output_cost_per_token": 0.0000012, + "source": "https://www.alibabacloud.com/help/en/model-studio/model-pricing", "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true }, - "databricks/databricks-claude-opus-4-1": { - "input_cost_per_token": 0.000015000020000000002, - "input_dbu_cost_per_token": 0.000214286, - "litellm_provider": "databricks", - "max_input_tokens": 200000, - "max_output_tokens": 32000, - "max_tokens": 200000, - "metadata": { - "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." - }, - "mode": "chat", - "output_cost_per_token": 0.00007500003000000001, + "dashscope/qwen3-vl-235b-a22b-instruct": { + "input_cost_per_token": 4e-7, + "litellm_provider": "dashscope", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0.0000016, + "source": "https://www.alibabacloud.com/help/en/model-studio/model-pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "dashscope/qwen3-vl-235b-a22b-thinking": { + "input_cost_per_token": 4e-7, + "litellm_provider": "dashscope", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0.000004, + "source": "https://www.alibabacloud.com/help/en/model-studio/model-pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "dashscope/qwen3-vl-32b-instruct": { + "input_cost_per_token": 1.6e-7, + "litellm_provider": "dashscope", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6.4e-7, + "source": "https://www.alibabacloud.com/help/en/model-studio/model-pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "dashscope/qwen3-vl-32b-thinking": { + "input_cost_per_token": 1.6e-7, + "litellm_provider": "dashscope", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0.00000287, + "source": "https://www.alibabacloud.com/help/en/model-studio/model-pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "dashscope/qwen3-vl-plus": { + "litellm_provider": "dashscope", + "max_input_tokens": 260096, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tiered_pricing": [ + { + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.0000016, + "range": [ + 0, + 32000 + ] + }, + { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000024, + "range": [ + 32000, + 128000 + ] + }, + { + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000048, + "range": [ + 128000, + 256000 + ] + } + ] + }, + "dashscope/qwen3.5-plus": { + "litellm_provider": "dashscope", + "max_input_tokens": 991808, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tiered_pricing": [ + { + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000024, + "range": [ + 0, + 256000 + ] + }, + { + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.000003, + "range": [ + 256000, + 1000000 + ] + } + ] + }, + "dashscope/qwq-plus": { + "input_cost_per_token": 8e-7, + "litellm_provider": "dashscope", + "max_input_tokens": 98304, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000024, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-3-7-sonnet": { + "input_cost_per_token": 0.0000029999900000000002, + "input_dbu_cost_per_token": 0.000042857, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.000015000020000000002, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-haiku-4-5": { + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.00000500003, + "output_dbu_cost_per_token": 0.000071429, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4": { + "input_cost_per_token": 0.000015000020000000002, + "input_dbu_cost_per_token": 0.000214286, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.00007500003000000001, + "output_dbu_cost_per_token": 0.001071429, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4-1": { + "input_cost_per_token": 0.000015000020000000002, + "input_dbu_cost_per_token": 0.000214286, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.00007500003000000001, "output_dbu_cost_per_token": 0.001071429, "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", "supports_assistant_prefill": true, @@ -6656,7 +8230,7 @@ "litellm_provider": "databricks", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "metadata": { "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, @@ -6675,7 +8249,7 @@ "litellm_provider": "databricks", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "metadata": { "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, @@ -6694,7 +8268,7 @@ "litellm_provider": "databricks", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "metadata": { "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, @@ -6713,7 +8287,7 @@ "litellm_provider": "databricks", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "metadata": { "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, @@ -6732,7 +8306,7 @@ "litellm_provider": "databricks", "max_input_tokens": 1048576, "max_output_tokens": 65535, - "max_tokens": 1048576, + "max_tokens": 65535, "metadata": { "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, @@ -6749,7 +8323,7 @@ "litellm_provider": "databricks", "max_input_tokens": 1048576, "max_output_tokens": 65536, - "max_tokens": 1048576, + "max_tokens": 65536, "metadata": { "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, @@ -6766,7 +8340,7 @@ "litellm_provider": "databricks", "max_input_tokens": 128000, "max_output_tokens": 32000, - "max_tokens": 128000, + "max_tokens": 32000, "metadata": { "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, @@ -6779,9 +8353,9 @@ "input_cost_per_token": 0.00000124999, "input_dbu_cost_per_token": 0.000017857, "litellm_provider": "databricks", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, - "max_tokens": 400000, + "max_tokens": 128000, "metadata": { "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, @@ -6794,9 +8368,9 @@ "input_cost_per_token": 0.00000124999, "input_dbu_cost_per_token": 0.000017857, "litellm_provider": "databricks", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, - "max_tokens": 400000, + "max_tokens": 128000, "metadata": { "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, @@ -6809,9 +8383,9 @@ "input_cost_per_token": 2.4997000000000006e-7, "input_dbu_cost_per_token": 0.000003571, "litellm_provider": "databricks", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, - "max_tokens": 400000, + "max_tokens": 128000, "metadata": { "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, @@ -6824,9 +8398,9 @@ "input_cost_per_token": 4.998e-8, "input_dbu_cost_per_token": 7.14e-7, "litellm_provider": "databricks", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, - "max_tokens": 400000, + "max_tokens": 128000, "metadata": { "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, @@ -6919,7 +8493,7 @@ "litellm_provider": "databricks", "max_input_tokens": 200000, "max_output_tokens": 128000, - "max_tokens": 200000, + "max_tokens": 128000, "metadata": { "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." }, @@ -7016,7 +8590,8 @@ "output_cost_per_token": 9e-8, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/NousResearch/Hermes-3-Llama-3.1-405B": { "max_tokens": 131072, @@ -7026,7 +8601,8 @@ "output_cost_per_token": 0.000001, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/NousResearch/Hermes-3-Llama-3.1-70B": { "max_tokens": 131072, @@ -7046,7 +8622,8 @@ "output_cost_per_token": 4e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/Qwen/Qwen2.5-72B-Instruct": { "max_tokens": 32768, @@ -7056,7 +8633,8 @@ "output_cost_per_token": 3.9e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/Qwen/Qwen2.5-7B-Instruct": { "max_tokens": 32768, @@ -7077,7 +8655,8 @@ "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "supports_function_calling": true }, "deepinfra/Qwen/Qwen3-14B": { "max_tokens": 40960, @@ -7087,7 +8666,8 @@ "output_cost_per_token": 2.4e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/Qwen/Qwen3-235B-A22B": { "max_tokens": 40960, @@ -7097,7 +8677,8 @@ "output_cost_per_token": 5.4e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507": { "max_tokens": 262144, @@ -7107,7 +8688,8 @@ "output_cost_per_token": 6e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/Qwen/Qwen3-235B-A22B-Thinking-2507": { "max_tokens": 262144, @@ -7117,7 +8699,8 @@ "output_cost_per_token": 0.0000029, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/Qwen/Qwen3-30B-A3B": { "max_tokens": 40960, @@ -7127,7 +8710,8 @@ "output_cost_per_token": 2.9e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/Qwen/Qwen3-32B": { "max_tokens": 40960, @@ -7137,7 +8721,8 @@ "output_cost_per_token": 2.8e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct": { "max_tokens": 262144, @@ -7147,7 +8732,8 @@ "output_cost_per_token": 0.0000016, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo": { "max_tokens": 262144, @@ -7157,7 +8743,8 @@ "output_cost_per_token": 0.0000012, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/Qwen/Qwen3-Next-80B-A3B-Instruct": { "max_tokens": 262144, @@ -7167,7 +8754,8 @@ "output_cost_per_token": 0.0000014, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/Qwen/Qwen3-Next-80B-A3B-Thinking": { "max_tokens": 262144, @@ -7177,8 +8765,9 @@ "output_cost_per_token": 0.0000014, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true - }, + "supports_tool_choice": true, + "supports_function_calling": true + }, "deepinfra/Sao10K/L3-8B-Lunaris-v1-Turbo": { "max_tokens": 8192, "max_input_tokens": 8192, @@ -7228,7 +8817,8 @@ "cache_read_input_token_cost": 3.3e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/anthropic/claude-4-opus": { "max_tokens": 200000, @@ -7238,7 +8828,8 @@ "output_cost_per_token": 0.0000825, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/anthropic/claude-4-sonnet": { "max_tokens": 200000, @@ -7248,7 +8839,8 @@ "output_cost_per_token": 0.0000165, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/deepseek-ai/DeepSeek-R1": { "max_tokens": 163840, @@ -7258,7 +8850,8 @@ "output_cost_per_token": 0.0000024, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/deepseek-ai/DeepSeek-R1-0528": { "max_tokens": 163840, @@ -7269,7 +8862,8 @@ "cache_read_input_token_cost": 4e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/deepseek-ai/DeepSeek-R1-0528-Turbo": { "max_tokens": 32768, @@ -7279,7 +8873,8 @@ "output_cost_per_token": 0.000003, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { "max_tokens": 131072, @@ -7299,7 +8894,8 @@ "output_cost_per_token": 2.7e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/deepseek-ai/DeepSeek-R1-Turbo": { "max_tokens": 40960, @@ -7309,7 +8905,8 @@ "output_cost_per_token": 0.000003, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/deepseek-ai/DeepSeek-V3": { "max_tokens": 163840, @@ -7319,7 +8916,8 @@ "output_cost_per_token": 8.9e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/deepseek-ai/DeepSeek-V3-0324": { "max_tokens": 163840, @@ -7329,7 +8927,8 @@ "output_cost_per_token": 8.8e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/deepseek-ai/DeepSeek-V3.1": { "max_tokens": 163840, @@ -7341,7 +8940,8 @@ "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true, - "supports_reasoning": true + "supports_reasoning": true, + "supports_function_calling": true }, "deepinfra/deepseek-ai/DeepSeek-V3.1-Terminus": { "max_tokens": 163840, @@ -7352,9 +8952,11 @@ "cache_read_input_token_cost": 2.16e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/google/gemini-2.0-flash-001": { + "deprecation_date": "2026-06-01", "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, @@ -7362,7 +8964,8 @@ "output_cost_per_token": 4e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/google/gemini-2.5-flash": { "max_tokens": 1000000, @@ -7372,7 +8975,8 @@ "output_cost_per_token": 0.0000025, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/google/gemini-2.5-pro": { "max_tokens": 1000000, @@ -7382,7 +8986,8 @@ "output_cost_per_token": 0.00001, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/google/gemma-3-12b-it": { "max_tokens": 131072, @@ -7392,7 +8997,8 @@ "output_cost_per_token": 1e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/google/gemma-3-27b-it": { "max_tokens": 131072, @@ -7402,7 +9008,8 @@ "output_cost_per_token": 1.6e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/google/gemma-3-4b-it": { "max_tokens": 131072, @@ -7412,7 +9019,8 @@ "output_cost_per_token": 8e-8, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/meta-llama/Llama-3.2-11B-Vision-Instruct": { "max_tokens": 131072, @@ -7432,7 +9040,8 @@ "output_cost_per_token": 2e-8, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/meta-llama/Llama-3.3-70B-Instruct": { "max_tokens": 131072, @@ -7442,7 +9051,8 @@ "output_cost_per_token": 4e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo": { "max_tokens": 131072, @@ -7452,6 +9062,7 @@ "output_cost_per_token": 3.9e-7, "litellm_provider": "deepinfra", "mode": "chat", + "supports_function_calling": true, "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { @@ -7462,7 +9073,8 @@ "output_cost_per_token": 6e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/meta-llama/Llama-4-Scout-17B-16E-Instruct": { "max_tokens": 327680, @@ -7472,7 +9084,8 @@ "output_cost_per_token": 3e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/meta-llama/Llama-Guard-3-8B": { "max_tokens": 131072, @@ -7502,7 +9115,8 @@ "output_cost_per_token": 6e-8, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct": { "max_tokens": 131072, @@ -7512,7 +9126,8 @@ "output_cost_per_token": 4e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { "max_tokens": 131072, @@ -7522,7 +9137,8 @@ "output_cost_per_token": 2.8e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct": { "max_tokens": 131072, @@ -7532,7 +9148,8 @@ "output_cost_per_token": 5e-8, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { "max_tokens": 131072, @@ -7542,7 +9159,8 @@ "output_cost_per_token": 3e-8, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/microsoft/WizardLM-2-8x22B": { "max_tokens": 65536, @@ -7562,7 +9180,8 @@ "output_cost_per_token": 1.4e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/mistralai/Mistral-Nemo-Instruct-2407": { "max_tokens": 131072, @@ -7572,7 +9191,8 @@ "output_cost_per_token": 4e-8, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/mistralai/Mistral-Small-24B-Instruct-2501": { "max_tokens": 32768, @@ -7582,7 +9202,8 @@ "output_cost_per_token": 8e-8, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/mistralai/Mistral-Small-3.2-24B-Instruct-2506": { "max_tokens": 128000, @@ -7592,7 +9213,8 @@ "output_cost_per_token": 2e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": { "max_tokens": 32768, @@ -7602,7 +9224,8 @@ "output_cost_per_token": 4e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/moonshotai/Kimi-K2-Instruct": { "max_tokens": 131072, @@ -7612,7 +9235,8 @@ "output_cost_per_token": 0.000002, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/moonshotai/Kimi-K2-Instruct-0905": { "max_tokens": 262144, @@ -7623,7 +9247,8 @@ "cache_read_input_token_cost": 4e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/nvidia/Llama-3.1-Nemotron-70B-Instruct": { "max_tokens": 131072, @@ -7633,7 +9258,8 @@ "output_cost_per_token": 6e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5": { "max_tokens": 131072, @@ -7643,7 +9269,8 @@ "output_cost_per_token": 4e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2": { "max_tokens": 131072, @@ -7653,7 +9280,8 @@ "output_cost_per_token": 1.6e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/openai/gpt-oss-120b": { "max_tokens": 131072, @@ -7663,7 +9291,8 @@ "output_cost_per_token": 4.5e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/openai/gpt-oss-20b": { "max_tokens": 131072, @@ -7673,7 +9302,8 @@ "output_cost_per_token": 1.5e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepinfra/zai-org/GLM-4.5": { "max_tokens": 131072, @@ -7683,21 +9313,25 @@ "output_cost_per_token": 0.0000016, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "deepseek/deepseek-chat": { - "cache_read_input_token_cost": 3e-8, - "input_cost_per_token": 3e-7, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 2.8e-8, + "input_cost_per_token": 2.8e-7, + "input_cost_per_token_cache_hit": 2.8e-8, "litellm_provider": "deepseek", - "max_input_tokens": 128000, + "max_input_tokens": 131072, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 5e-7, + "output_cost_per_token": 4.2e-7, "source": "https://api-docs.deepseek.com/quick_start/pricing", "supported_endpoints": [ "/v1/chat/completions" ], + "supports_assistant_prefill": true, "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -7736,19 +9370,28 @@ "supports_tool_choice": true }, "deepseek/deepseek-reasoner": { - "input_cost_per_token": 5.5e-7, - "input_cost_per_token_cache_hit": 1.4e-7, + "cache_read_input_token_cost": 2.8e-8, + "input_cost_per_token": 2.8e-7, + "input_cost_per_token_cache_hit": 2.8e-8, "litellm_provider": "deepseek", - "max_input_tokens": 65536, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.00000219, + "output_cost_per_token": 4.2e-7, + "source": "https://api-docs.deepseek.com/quick_start/pricing", + "supported_endpoints": [ + "/v1/chat/completions" + ], "supports_assistant_prefill": true, - "supports_function_calling": true, + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, "supports_prompt_caching": true, "supports_reasoning": true, - "supports_tool_choice": true + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false }, "deepseek/deepseek-v3": { "cache_creation_input_token_cost": 0, @@ -7772,7 +9415,7 @@ "litellm_provider": "deepseek", "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 8192, + "max_tokens": 163840, "mode": "chat", "output_cost_per_token": 4e-7, "supports_assistant_prefill": true, @@ -7786,13 +9429,68 @@ "litellm_provider": "bedrock_converse", "max_input_tokens": 163840, "max_output_tokens": 81920, - "max_tokens": 163840, + "max_tokens": 81920, "mode": "chat", "output_cost_per_token": 0.00000168, "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true }, + "deepseek.v3.2": { + "input_cost_per_token": 6.2e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000185, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "deepseek-v3-2-251201": { + "input_cost_per_token": 0, + "litellm_provider": "volcengine", + "max_input_tokens": 98304, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "glm-4-7-251222": { + "input_cost_per_token": 0, + "litellm_provider": "volcengine", + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "kimi-k2-thinking-251104": { + "input_cost_per_token": 0, + "litellm_provider": "volcengine", + "max_input_tokens": 229376, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, "eu.amazon.nova-lite-v1:0": { "input_cost_per_token": 7.8e-8, "litellm_provider": "bedrock_converse", @@ -7847,7 +9545,9 @@ "supports_pdf_input": true, "supports_prompt_caching": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "cache_read_input_token_cost": 2.5e-8, + "cache_creation_input_token_cost": 3.125e-7 }, "eu.anthropic.claude-haiku-4-5-20251001-v1:0": { "cache_creation_input_token_cost": 0.000001375, @@ -7884,7 +9584,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-7, + "cache_creation_input_token_cost": 0.00000375 }, "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": { "input_cost_per_token": 0.000003, @@ -7901,7 +9603,9 @@ "supports_prompt_caching": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-7, + "cache_creation_input_token_cost": 0.00000375 }, "eu.anthropic.claude-3-7-sonnet-20250219-v1:0": { "input_cost_per_token": 0.000003, @@ -7919,7 +9623,9 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-7, + "cache_creation_input_token_cost": 0.00000375 }, "eu.anthropic.claude-3-haiku-20240307-v1:0": { "input_cost_per_token": 2.5e-7, @@ -7933,7 +9639,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 2.5e-8, + "cache_creation_input_token_cost": 3.125e-7 }, "eu.anthropic.claude-3-opus-20240229-v1:0": { "input_cost_per_token": 0.000015, @@ -7946,7 +9654,9 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 0.0000015, + "cache_creation_input_token_cost": 0.00001875 }, "eu.anthropic.claude-3-sonnet-20240229-v1:0": { "input_cost_per_token": 0.000003, @@ -7960,7 +9670,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-7, + "cache_creation_input_token_cost": 0.00000375 }, "eu.anthropic.claude-opus-4-1-20250805-v1:0": { "cache_creation_input_token_cost": 0.00001875, @@ -8079,7 +9791,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 1.3e-7, "supports_function_calling": true, @@ -8090,7 +9802,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 1.9e-7, "supports_function_calling": true, @@ -8101,7 +9813,7 @@ "litellm_provider": "bedrock_converse", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.000006, "supports_function_calling": true, @@ -8111,14 +9823,14 @@ "litellm_provider": "featherless_ai", "max_input_tokens": 32768, "max_output_tokens": 4096, - "max_tokens": 32768, + "max_tokens": 4096, "mode": "chat" }, "featherless_ai/featherless-ai/Qwerky-QwQ-32B": { "litellm_provider": "featherless_ai", "max_input_tokens": 32768, "max_output_tokens": 4096, - "max_tokens": 32768, + "max_tokens": 4096, "mode": "chat" }, "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct": { @@ -8221,13 +9933,13 @@ "supports_tool_choice": true }, "fireworks_ai/accounts/fireworks/models/deepseek-v3p2": { - "input_cost_per_token": 0.0000012, + "input_cost_per_token": 5.6e-7, "litellm_provider": "fireworks_ai", "max_input_tokens": 163840, "max_output_tokens": 163840, "max_tokens": 163840, "mode": "chat", - "output_cost_per_token": 0.0000012, + "output_cost_per_token": 0.00000168, "source": "https://fireworks.ai/models/fireworks/deepseek-v3p2", "supports_function_calling": true, "supports_reasoning": true, @@ -8289,6 +10001,21 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "fireworks_ai/accounts/fireworks/models/glm-4p7": { + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 202800, + "max_output_tokens": 202800, + "max_tokens": 202800, + "mode": "chat", + "output_cost_per_token": 0.0000022, + "source": "https://fireworks.ai/models/fireworks/glm-4p7", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "fireworks_ai/accounts/fireworks/models/gpt-oss-120b": { "input_cost_per_token": 1.5e-7, "litellm_provider": "fireworks_ai", @@ -8322,7 +10049,7 @@ "litellm_provider": "fireworks_ai", "max_input_tokens": 131072, "max_output_tokens": 16384, - "max_tokens": 131072, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.0000025, "source": "https://fireworks.ai/models/fireworks/kimi-k2-instruct", @@ -8335,7 +10062,7 @@ "litellm_provider": "fireworks_ai", "max_input_tokens": 262144, "max_output_tokens": 32768, - "max_tokens": 262144, + "max_tokens": 32768, "mode": "chat", "output_cost_per_token": 0.0000025, "source": "https://app.fireworks.ai/models/fireworks/kimi-k2-instruct-0905", @@ -8357,6 +10084,20 @@ "supports_tool_choice": true, "supports_web_search": true }, + "fireworks_ai/accounts/fireworks/models/kimi-k2p5": { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct": { "input_cost_per_token": 0.000003, "litellm_provider": "fireworks_ai", @@ -8460,6 +10201,20 @@ "supports_response_schema": true, "supports_tool_choice": false }, + "fireworks_ai/accounts/fireworks/models/minimax-m2p1": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_token": 3e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 204800, + "max_output_tokens": 204800, + "max_tokens": 204800, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "source": "https://fireworks.ai/models/fireworks/minimax-m2p1", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf": { "input_cost_per_token": 0.0000012, "litellm_provider": "fireworks_ai", @@ -8512,6 +10267,49 @@ "supports_response_schema": true, "supports_tool_choice": false }, + "fireworks_ai/glm-4p7": { + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 202800, + "max_output_tokens": 202800, + "max_tokens": 202800, + "mode": "chat", + "output_cost_per_token": 0.0000022, + "source": "https://fireworks.ai/models/fireworks/glm-4p7", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/kimi-k2p5": { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/minimax-m2p1": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_token": 3e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 204800, + "max_output_tokens": 204800, + "max_tokens": 204800, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "source": "https://fireworks.ai/models/fireworks/minimax-m2p1", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "friendliai/meta-llama-3.1-70b-instruct": { "input_cost_per_token": 6e-7, "litellm_provider": "friendliai", @@ -8825,7 +10623,7 @@ "litellm_provider": "vertex_ai-language-models", "max_input_tokens": 8192, "max_output_tokens": 2048, - "max_tokens": 8192, + "max_tokens": 2048, "mode": "chat", "output_cost_per_character": 3.75e-7, "output_cost_per_token": 0.0000015, @@ -8842,7 +10640,7 @@ "litellm_provider": "vertex_ai-language-models", "max_input_tokens": 8192, "max_output_tokens": 2048, - "max_tokens": 8192, + "max_tokens": 2048, "mode": "chat", "output_cost_per_character": 3.75e-7, "output_cost_per_token": 0.0000015, @@ -8852,6 +10650,7 @@ "supports_tool_choice": true }, "gemini-1.5-flash": { + "deprecation_date": "2025-09-29", "input_cost_per_audio_per_second": 0.000002, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, "input_cost_per_character": 1.875e-8, @@ -8956,6 +10755,7 @@ "supports_vision": true }, "gemini-1.5-flash-exp-0827": { + "deprecation_date": "2025-09-29", "input_cost_per_audio_per_second": 0.000002, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, "input_cost_per_character": 1.875e-8, @@ -8990,6 +10790,7 @@ "supports_vision": true }, "gemini-1.5-flash-preview-0514": { + "deprecation_date": "2025-09-29", "input_cost_per_audio_per_second": 0.000002, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, "input_cost_per_character": 1.875e-8, @@ -9023,6 +10824,7 @@ "supports_vision": true }, "gemini-1.5-pro": { + "deprecation_date": "2025-09-29", "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, @@ -9110,6 +10912,7 @@ "supports_vision": true }, "gemini-1.5-pro-preview-0215": { + "deprecation_date": "2025-09-29", "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, @@ -9137,6 +10940,7 @@ "supports_tool_choice": true }, "gemini-1.5-pro-preview-0409": { + "deprecation_date": "2025-09-29", "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, @@ -9163,6 +10967,7 @@ "supports_tool_choice": true }, "gemini-1.5-pro-preview-0514": { + "deprecation_date": "2025-09-29", "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, "input_cost_per_character": 3.125e-7, @@ -9191,6 +10996,7 @@ }, "gemini-2.0-flash": { "cache_read_input_token_cost": 2.5e-8, + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7e-7, "input_cost_per_token": 1e-7, "litellm_provider": "vertex_ai-language-models", @@ -9230,7 +11036,7 @@ }, "gemini-2.0-flash-001": { "cache_read_input_token_cost": 3.75e-8, - "deprecation_date": "2026-02-05", + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 1.5e-7, "litellm_provider": "vertex_ai-language-models", @@ -9316,6 +11122,7 @@ }, "gemini-2.0-flash-lite": { "cache_read_input_token_cost": 1.875e-8, + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7.5e-8, "input_cost_per_token": 7.5e-8, "litellm_provider": "vertex_ai-language-models", @@ -9351,7 +11158,7 @@ }, "gemini-2.0-flash-lite-001": { "cache_read_input_token_cost": 1.875e-8, - "deprecation_date": "2026-02-25", + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7.5e-8, "input_cost_per_token": 7.5e-8, "litellm_provider": "vertex_ai-language-models", @@ -9434,6 +11241,7 @@ "tpm": 250000 }, "gemini-2.0-flash-preview-image-generation": { + "deprecation_date": "2025-11-14", "cache_read_input_token_cost": 2.5e-8, "input_cost_per_audio_token": 7e-7, "input_cost_per_token": 1e-7, @@ -9472,6 +11280,7 @@ "supports_web_search": true }, "gemini-2.0-flash-thinking-exp": { + "deprecation_date": "2025-12-02", "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, @@ -9520,6 +11329,7 @@ "supports_web_search": true }, "gemini-2.0-flash-thinking-exp-01-21": { + "deprecation_date": "2025-12-02", "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, @@ -9656,9 +11466,60 @@ "supports_vision": true, "supports_web_search": true }, - "gemini-2.5-flash-lite": { + "gemini-3.1-flash-lite-preview": { "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost_per_audio_token": 5e-8, "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 2.5e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 0.0000015, + "output_cost_per_token": 0.0000015, + "source": "https://ai.google.dev/gemini-api/docs/models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true + }, + "gemini-2.5-flash-lite": { + "cache_read_input_token_cost": 1e-8, + "input_cost_per_audio_token": 3e-7, "input_cost_per_token": 1e-7, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, @@ -9702,7 +11563,7 @@ "supports_web_search": true }, "gemini-2.5-flash-lite-preview-09-2025": { - "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost": 1e-8, "input_cost_per_audio_token": 3e-7, "input_cost_per_token": 1e-7, "litellm_provider": "vertex_ai-language-models", @@ -9884,6 +11745,7 @@ "tpm": 8000000 }, "gemini-2.5-flash-lite-preview-06-17": { + "deprecation_date": "2025-11-18", "cache_read_input_token_cost": 2.5e-8, "input_cost_per_audio_token": 5e-7, "input_cost_per_token": 1e-7, @@ -9973,6 +11835,7 @@ "supports_web_search": true }, "gemini-2.5-flash-preview-05-20": { + "deprecation_date": "2025-11-18", "cache_read_input_token_cost": 7.5e-8, "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, @@ -10019,6 +11882,7 @@ }, "gemini-2.5-pro": { "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, "input_cost_per_token": 0.00000125, "input_cost_per_token_above_200k_tokens": 0.0000025, @@ -10062,6 +11926,7 @@ "supports_web_search": true }, "gemini-3-pro-preview": { + "deprecation_date": "2026-03-26", "cache_read_input_token_cost": 2e-7, "cache_read_input_token_cost_above_200k_tokens": 4e-7, "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, @@ -10107,30 +11972,39 @@ "supports_tool_choice": true, "supports_video_input": true, "supports_vision": true, - "supports_web_search": true - }, - "vertex_ai/gemini-3-pro-preview": { + "supports_web_search": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true + }, + "gemini-3.1-pro-preview": { "cache_read_input_token_cost": 2e-7, "cache_read_input_token_cost_above_200k_tokens": 4e-7, "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, "input_cost_per_token": 0.000002, "input_cost_per_token_above_200k_tokens": 0.000004, "input_cost_per_token_batches": 0.000001, - "litellm_provider": "vertex_ai", + "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, "max_input_tokens": 1048576, - "max_output_tokens": 65535, + "max_output_tokens": 65536, "max_pdf_size_mb": 30, - "max_tokens": 65535, + "max_tokens": 65536, "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", "output_cost_per_token": 0.000012, "output_cost_per_token_above_200k_tokens": 0.000018, "output_cost_per_token_batches": 0.000006, - "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "output_cost_per_image": 0.00012, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", "supported_endpoints": [ "/v1/chat/completions", "/v1/completions", @@ -10155,25 +12029,40 @@ "supports_tool_choice": true, "supports_video_input": true, "supports_vision": true, - "supports_web_search": true - }, - "vertex_ai/gemini-3-flash-preview": { - "cache_read_input_token_cost": 5e-8, - "input_cost_per_token": 5e-7, - "input_cost_per_audio_token": 0.000001, - "litellm_provider": "vertex_ai", + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true + }, + "gemini-3.1-pro-preview-customtools": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, + "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, "max_input_tokens": 1048576, - "max_output_tokens": 65535, + "max_output_tokens": 65536, "max_pdf_size_mb": 30, - "max_tokens": 65535, + "max_tokens": 65536, "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0.000003, - "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, + "output_cost_per_image": 0.00012, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", "supported_endpoints": [ "/v1/chat/completions", "/v1/completions", @@ -10198,13 +12087,18 @@ "supports_tool_choice": true, "supports_video_input": true, "supports_vision": true, - "supports_web_search": true + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true }, - "gemini-2.5-pro-exp-03-25": { - "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_token": 0.00000125, - "input_cost_per_token_above_200k_tokens": 0.0000025, - "litellm_provider": "vertex_ai-language-models", + "vertex_ai/gemini-3-pro-preview": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, + "litellm_provider": "vertex_ai", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, @@ -10215,12 +12109,14 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0.00001, - "output_cost_per_token_above_200k_tokens": 0.000015, + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supported_endpoints": [ "/v1/chat/completions", - "/v1/completions" + "/v1/completions", + "/v1/batch" ], "supported_modalities": [ "text", @@ -10233,22 +12129,29 @@ ], "supports_audio_input": true, "supports_function_calling": true, - "supports_parallel_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, + "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, "supports_video_input": true, "supports_vision": true, - "supports_web_search": true + "supports_web_search": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true }, - "gemini-2.5-pro-preview-03-25": { - "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_audio_token": 0.00000125, - "input_cost_per_token": 0.00000125, - "input_cost_per_token_above_200k_tokens": 0.0000025, - "litellm_provider": "vertex_ai-language-models", + "vertex_ai/gemini-3-flash-preview": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_token": 5e-7, + "input_cost_per_audio_token": 0.000001, + "litellm_provider": "vertex_ai", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, @@ -10259,9 +12162,8 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0.00001, - "output_cost_per_token_above_200k_tokens": 0.000015, - "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "output_cost_per_token": 0.000003, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supported_endpoints": [ "/v1/chat/completions", "/v1/completions", @@ -10276,37 +12178,47 @@ "supported_output_modalities": [ "text" ], - "supports_audio_output": false, + "supports_audio_input": true, "supports_function_calling": true, - "supports_parallel_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_video_input": true, "supports_vision": true, - "supports_web_search": true + "supports_web_search": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 9e-7, + "input_cost_per_audio_token_priority": 0.0000018, + "output_cost_per_token_priority": 0.0000054, + "cache_read_input_token_cost_priority": 9e-8, + "supports_service_tier": true }, - "gemini-2.5-pro-preview-05-06": { - "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_audio_token": 0.00000125, - "input_cost_per_token": 0.00000125, - "input_cost_per_token_above_200k_tokens": 0.0000025, - "litellm_provider": "vertex_ai-language-models", + "vertex_ai/gemini-3.1-pro-preview": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, + "litellm_provider": "vertex_ai", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, "max_input_tokens": 1048576, - "max_output_tokens": 65535, + "max_output_tokens": 65536, "max_pdf_size_mb": 30, - "max_tokens": 65535, + "max_tokens": 65536, "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0.00001, - "output_cost_per_token_above_200k_tokens": 0.000015, - "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, + "output_cost_per_image": 0.00012, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", "supported_endpoints": [ "/v1/chat/completions", "/v1/completions", @@ -10321,40 +12233,50 @@ "supported_output_modalities": [ "text" ], - "supported_regions": [ - "global" - ], - "supports_audio_output": false, + "supports_audio_input": true, "supports_function_calling": true, - "supports_parallel_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_video_input": true, "supports_vision": true, - "supports_web_search": true - }, - "gemini-2.5-pro-preview-06-05": { - "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_audio_token": 0.00000125, - "input_cost_per_token": 0.00000125, - "input_cost_per_token_above_200k_tokens": 0.0000025, - "litellm_provider": "vertex_ai-language-models", + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true + }, + "vertex_ai/gemini-3.1-pro-preview-customtools": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, + "litellm_provider": "vertex_ai", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, "max_input_tokens": 1048576, - "max_output_tokens": 65535, + "max_output_tokens": 65536, "max_pdf_size_mb": 30, - "max_tokens": 65535, + "max_tokens": 65536, "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0.00001, - "output_cost_per_token_above_200k_tokens": 0.000015, - "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, + "output_cost_per_image": 0.00012, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", "supported_endpoints": [ "/v1/chat/completions", "/v1/completions", @@ -10369,21 +12291,30 @@ "supported_output_modalities": [ "text" ], - "supports_audio_output": false, + "supports_audio_input": true, "supports_function_calling": true, - "supports_parallel_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_video_input": true, "supports_vision": true, - "supports_web_search": true + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true }, - "gemini-2.5-pro-preview-tts": { - "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_audio_token": 7e-7, + "gemini-2.5-pro-exp-03-25": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, "input_cost_per_token": 0.00000125, "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", @@ -10399,225 +12330,380 @@ "mode": "chat", "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, - "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], "supported_modalities": [ - "text" + "text", + "image", + "audio", + "video" ], "supported_output_modalities": [ - "audio" + "text" ], - "supports_audio_output": false, + "supports_audio_input": true, "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_pdf_input": true, "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_video_input": true, "supports_vision": true, "supports_web_search": true }, - "gemini-flash-experimental": { - "input_cost_per_character": 0, - "input_cost_per_token": 0, - "litellm_provider": "vertex_ai-language-models", - "max_input_tokens": 1000000, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_character": 0, - "output_cost_per_token": 0, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental", - "supports_function_calling": false, - "supports_parallel_function_calling": true, - "supports_tool_choice": true - }, - "gemini-pro": { - "input_cost_per_character": 1.25e-7, - "input_cost_per_image": 0.0025, - "input_cost_per_token": 5e-7, - "input_cost_per_video_per_second": 0.002, - "litellm_provider": "vertex_ai-language-models", - "max_input_tokens": 32760, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_character": 3.75e-7, - "output_cost_per_token": 0.0000015, - "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_tool_choice": true - }, - "gemini-pro-experimental": { - "input_cost_per_character": 0, - "input_cost_per_token": 0, + "gemini-2.5-pro-preview-03-25": { + "deprecation_date": "2025-12-02", + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_audio_token": 0.00000125, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", - "max_input_tokens": 1000000, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_character": 0, - "output_cost_per_token": 0, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental", - "supports_function_calling": false, - "supports_parallel_function_calling": true, - "supports_tool_choice": true - }, - "gemini-pro-vision": { - "input_cost_per_image": 0.0025, - "input_cost_per_token": 5e-7, - "litellm_provider": "vertex_ai-vision-models", - "max_images_per_prompt": 16, - "max_input_tokens": 16384, - "max_output_tokens": 2048, - "max_tokens": 2048, - "max_video_length": 2, - "max_videos_per_prompt": 1, - "mode": "chat", - "output_cost_per_token": 0.0000015, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_tool_choice": true, - "supports_vision": true - }, - "gemini/gemini-1.5-flash": { - "input_cost_per_token": 7.5e-8, - "input_cost_per_token_above_128k_tokens": 1.5e-7, - "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, "max_input_tokens": 1048576, - "max_output_tokens": 8192, + "max_output_tokens": 65535, "max_pdf_size_mb": 30, - "max_tokens": 8192, + "max_tokens": 65535, "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 3e-7, - "output_cost_per_token_above_128k_tokens": 6e-7, - "rpm": 2000, - "source": "https://ai.google.dev/pricing", + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true, - "tpm": 4000000 + "supports_web_search": true }, - "gemini/gemini-1.5-flash-001": { - "cache_creation_input_token_cost": 0.000001, - "cache_read_input_token_cost": 1.875e-8, - "deprecation_date": "2025-05-24", - "input_cost_per_token": 7.5e-8, - "input_cost_per_token_above_128k_tokens": 1.5e-7, - "litellm_provider": "gemini", + "gemini-2.5-pro-preview-05-06": { + "deprecation_date": "2025-12-02", + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_audio_token": 0.00000125, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, "max_input_tokens": 1048576, - "max_output_tokens": 8192, + "max_output_tokens": 65535, "max_pdf_size_mb": 30, - "max_tokens": 8192, + "max_tokens": 65535, "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 3e-7, - "output_cost_per_token_above_128k_tokens": 6e-7, - "rpm": 2000, - "source": "https://ai.google.dev/pricing", + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supported_regions": [ + "global" + ], + "supports_audio_output": false, "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, "supports_prompt_caching": true, + "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true, - "tpm": 4000000 + "supports_web_search": true }, - "gemini/gemini-1.5-flash-002": { - "cache_creation_input_token_cost": 0.000001, - "cache_read_input_token_cost": 1.875e-8, - "deprecation_date": "2025-09-24", - "input_cost_per_token": 7.5e-8, - "input_cost_per_token_above_128k_tokens": 1.5e-7, - "litellm_provider": "gemini", + "gemini-2.5-pro-preview-06-05": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_audio_token": 0.00000125, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, "max_input_tokens": 1048576, - "max_output_tokens": 8192, + "max_output_tokens": 65535, "max_pdf_size_mb": 30, - "max_tokens": 8192, + "max_tokens": 65535, "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 3e-7, - "output_cost_per_token_above_128k_tokens": 6e-7, - "rpm": 2000, - "source": "https://ai.google.dev/pricing", + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, "supports_prompt_caching": true, + "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true, - "tpm": 4000000 + "supports_web_search": true }, - "gemini/gemini-1.5-flash-8b": { - "input_cost_per_token": 0, - "input_cost_per_token_above_128k_tokens": 0, - "litellm_provider": "gemini", + "gemini-2.5-pro-preview-tts": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, "max_input_tokens": 1048576, - "max_output_tokens": 8192, + "max_output_tokens": 65535, "max_pdf_size_mb": 30, - "max_tokens": 8192, + "max_tokens": 65535, "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0, - "output_cost_per_token_above_128k_tokens": 0, - "rpm": 4000, - "source": "https://ai.google.dev/pricing", - "supports_function_calling": true, - "supports_prompt_caching": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true, - "tpm": 4000000 + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "audio" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true }, - "gemini/gemini-1.5-flash-8b-exp-0827": { - "input_cost_per_token": 0, - "input_cost_per_token_above_128k_tokens": 0, + "gemini-robotics-er-1.5-preview": { + "cache_read_input_token_cost": 0, + "input_cost_per_token": 3e-7, + "input_cost_per_audio_token": 0.000001, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "output_cost_per_reasoning_token": 0.0000025, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-robotics-er-1-5-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "video", + "audio" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true + }, + "gemini/gemini-robotics-er-1.5-preview": { + "cache_read_input_token_cost": 0, + "input_cost_per_token": 3e-7, + "input_cost_per_audio_token": 0.000001, "litellm_provider": "gemini", - "max_audio_length_hours": 8.4, - "max_audio_per_prompt": 1, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "output_cost_per_reasoning_token": 0.0000025, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-robotics-er-1-5-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "video", + "audio" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini-2.5-computer-use-preview-10-2025": { + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "litellm_provider": "vertex_ai-language-models", "max_images_per_prompt": 3000, + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/computer-use", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_computer_use": true, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini-flash-experimental": { + "input_cost_per_character": 0, + "input_cost_per_token": 0, + "litellm_provider": "vertex_ai-language-models", "max_input_tokens": 1000000, "max_output_tokens": 8192, - "max_pdf_size_mb": 30, "max_tokens": 8192, - "max_video_length": 1, - "max_videos_per_prompt": 10, "mode": "chat", + "output_cost_per_character": 0, "output_cost_per_token": 0, - "output_cost_per_token_above_128k_tokens": 0, - "rpm": 4000, - "source": "https://ai.google.dev/pricing", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental", + "supports_function_calling": false, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "gemini-pro": { + "input_cost_per_character": 1.25e-7, + "input_cost_per_image": 0.0025, + "input_cost_per_token": 5e-7, + "input_cost_per_video_per_second": 0.002, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 3.75e-7, + "output_cost_per_token": 0.0000015, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_function_calling": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true, - "tpm": 4000000 + "supports_parallel_function_calling": true, + "supports_tool_choice": true }, - "gemini/gemini-1.5-flash-8b-exp-0924": { + "gemini-pro-experimental": { + "input_cost_per_character": 0, "input_cost_per_token": 0, - "input_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 0, + "output_cost_per_token": 0, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental", + "supports_function_calling": false, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "gemini-pro-vision": { + "input_cost_per_image": 0.0025, + "input_cost_per_token": 5e-7, + "litellm_provider": "vertex_ai-vision-models", + "max_images_per_prompt": 16, + "max_input_tokens": 16384, + "max_output_tokens": 2048, + "max_tokens": 2048, + "max_video_length": 2, + "max_videos_per_prompt": 1, + "mode": "chat", + "output_cost_per_token": 0.0000015, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini/gemini-1.5-flash": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1.5e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10629,21 +12715,23 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0, - "output_cost_per_token_above_128k_tokens": 0, - "rpm": 4000, + "output_cost_per_token": 3e-7, + "output_cost_per_token_above_128k_tokens": 6e-7, + "rpm": 2000, "source": "https://ai.google.dev/pricing", "supports_function_calling": true, - "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true, "tpm": 4000000 }, - "gemini/gemini-1.5-flash-exp-0827": { - "input_cost_per_token": 0, - "input_cost_per_token_above_128k_tokens": 0, + "gemini/gemini-1.5-flash-001": { + "cache_creation_input_token_cost": 0.000001, + "cache_read_input_token_cost": 1.875e-8, + "deprecation_date": "2025-05-24", + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1.5e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10655,18 +12743,22 @@ "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0, - "output_cost_per_token_above_128k_tokens": 0, + "output_cost_per_token": 3e-7, + "output_cost_per_token_above_128k_tokens": 6e-7, "rpm": 2000, "source": "https://ai.google.dev/pricing", "supports_function_calling": true, + "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true, "tpm": 4000000 }, - "gemini/gemini-1.5-flash-latest": { + "gemini/gemini-1.5-flash-002": { + "cache_creation_input_token_cost": 0.000001, + "cache_read_input_token_cost": 1.875e-8, + "deprecation_date": "2025-09-24", "input_cost_per_token": 7.5e-8, "input_cost_per_token_above_128k_tokens": 1.5e-7, "litellm_provider": "gemini", @@ -10692,49 +12784,183 @@ "supports_vision": true, "tpm": 4000000 }, - "gemini/gemini-1.5-pro": { - "input_cost_per_token": 0.0000035, - "input_cost_per_token_above_128k_tokens": 0.000007, + "gemini/gemini-1.5-flash-8b": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, "litellm_provider": "gemini", - "max_input_tokens": 2097152, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, "max_output_tokens": 8192, + "max_pdf_size_mb": 30, "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0.0000105, - "output_cost_per_token_above_128k_tokens": 0.000021, - "rpm": 1000, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 4000, "source": "https://ai.google.dev/pricing", "supports_function_calling": true, + "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true, "tpm": 4000000 }, - "gemini/gemini-1.5-pro-001": { - "deprecation_date": "2025-05-24", - "input_cost_per_token": 0.0000035, - "input_cost_per_token_above_128k_tokens": 0.000007, + "gemini/gemini-1.5-flash-8b-exp-0827": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, "litellm_provider": "gemini", - "max_input_tokens": 2097152, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1000000, "max_output_tokens": 8192, + "max_pdf_size_mb": 30, "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0.0000105, - "output_cost_per_token_above_128k_tokens": 0.000021, - "rpm": 1000, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 4000, "source": "https://ai.google.dev/pricing", "supports_function_calling": true, - "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true, "tpm": 4000000 }, - "gemini/gemini-1.5-pro-002": { - "deprecation_date": "2025-09-24", - "input_cost_per_token": 0.0000035, + "gemini/gemini-1.5-flash-8b-exp-0924": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 4000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-flash-exp-0827": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 2000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-flash-latest": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1.5e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-7, + "output_cost_per_token_above_128k_tokens": 6e-7, + "rpm": 2000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-pro": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, + "litellm_provider": "gemini", + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-pro-001": { + "deprecation_date": "2025-05-24", + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, + "litellm_provider": "gemini", + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-1.5-pro-002": { + "deprecation_date": "2025-09-24", + "input_cost_per_token": 0.0000035, "input_cost_per_token_above_128k_tokens": 0.000007, "litellm_provider": "gemini", "max_input_tokens": 2097152, @@ -10754,6 +12980,7 @@ "tpm": 4000000 }, "gemini/gemini-1.5-pro-exp-0801": { + "deprecation_date": "2025-09-29", "input_cost_per_token": 0.0000035, "input_cost_per_token_above_128k_tokens": 0.000007, "litellm_provider": "gemini", @@ -10773,6 +13000,7 @@ "tpm": 4000000 }, "gemini/gemini-1.5-pro-exp-0827": { + "deprecation_date": "2025-09-29", "input_cost_per_token": 0, "input_cost_per_token_above_128k_tokens": 0, "litellm_provider": "gemini", @@ -10792,6 +13020,7 @@ "tpm": 4000000 }, "gemini/gemini-1.5-pro-latest": { + "deprecation_date": "2025-09-29", "input_cost_per_token": 0.0000035, "input_cost_per_token_above_128k_tokens": 0.000007, "litellm_provider": "gemini", @@ -10812,6 +13041,7 @@ }, "gemini/gemini-2.0-flash": { "cache_read_input_token_cost": 2.5e-8, + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7e-7, "input_cost_per_token": 1e-7, "litellm_provider": "gemini", @@ -10852,6 +13082,7 @@ }, "gemini/gemini-2.0-flash-001": { "cache_read_input_token_cost": 2.5e-8, + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7e-7, "input_cost_per_token": 1e-7, "litellm_provider": "gemini", @@ -10939,6 +13170,7 @@ }, "gemini/gemini-2.0-flash-lite": { "cache_read_input_token_cost": 1.875e-8, + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7.5e-8, "input_cost_per_token": 7.5e-8, "litellm_provider": "gemini", @@ -10974,6 +13206,7 @@ "tpm": 4000000 }, "gemini/gemini-2.0-flash-lite-preview-02-05": { + "deprecation_date": "2025-12-09", "cache_read_input_token_cost": 1.875e-8, "input_cost_per_audio_token": 7.5e-8, "input_cost_per_token": 7.5e-8, @@ -11011,6 +13244,7 @@ "tpm": 10000000 }, "gemini/gemini-2.0-flash-live-001": { + "deprecation_date": "2025-12-09", "cache_read_input_token_cost": 7.5e-8, "input_cost_per_audio_token": 0.0000021, "input_cost_per_image": 0.0000021, @@ -11059,6 +13293,7 @@ "tpm": 250000 }, "gemini/gemini-2.0-flash-preview-image-generation": { + "deprecation_date": "2025-11-14", "cache_read_input_token_cost": 2.5e-8, "input_cost_per_audio_token": 7e-7, "input_cost_per_token": 1e-7, @@ -11098,6 +13333,7 @@ "tpm": 10000000 }, "gemini/gemini-2.0-flash-thinking-exp": { + "deprecation_date": "2025-12-02", "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, @@ -11116,7 +13352,7 @@ "max_input_tokens": 1048576, "max_output_tokens": 65536, "max_pdf_size_mb": 30, - "max_tokens": 8192, + "max_tokens": 65536, "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", @@ -11147,6 +13383,7 @@ "tpm": 4000000 }, "gemini/gemini-2.0-flash-thinking-exp-01-21": { + "deprecation_date": "2025-12-02", "cache_read_input_token_cost": 0, "input_cost_per_audio_per_second": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, @@ -11165,7 +13402,7 @@ "max_input_tokens": 1048576, "max_output_tokens": 65536, "max_pdf_size_mb": 30, - "max_tokens": 8192, + "max_tokens": 65536, "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", @@ -11285,8 +13522,8 @@ "tpm": 8000000 }, "gemini/gemini-2.5-flash-lite": { - "cache_read_input_token_cost": 2.5e-8, - "input_cost_per_audio_token": 5e-7, + "cache_read_input_token_cost": 1e-8, + "input_cost_per_audio_token": 3e-7, "input_cost_per_token": 1e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, @@ -11332,7 +13569,7 @@ "tpm": 250000 }, "gemini/gemini-2.5-flash-lite-preview-09-2025": { - "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost": 1e-8, "input_cost_per_audio_token": 3e-7, "input_cost_per_token": 1e-7, "litellm_provider": "gemini", @@ -11520,6 +13757,7 @@ "tpm": 250000 }, "gemini/gemini-2.5-flash-lite-preview-06-17": { + "deprecation_date": "2025-11-18", "cache_read_input_token_cost": 2.5e-8, "input_cost_per_audio_token": 5e-7, "input_cost_per_token": 1e-7, @@ -11611,6 +13849,7 @@ "tpm": 250000 }, "gemini/gemini-2.5-flash-preview-05-20": { + "deprecation_date": "2025-11-18", "cache_read_input_token_cost": 7.5e-8, "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, @@ -11655,50 +13894,13 @@ "supports_web_search": true, "tpm": 250000 }, - "gemini/gemini-2.5-flash-preview-tts": { - "cache_read_input_token_cost": 3.75e-8, - "input_cost_per_audio_token": 0.000001, - "input_cost_per_token": 1.5e-7, - "litellm_provider": "gemini", - "max_audio_length_hours": 8.4, - "max_audio_per_prompt": 1, - "max_images_per_prompt": 3000, - "max_input_tokens": 1048576, - "max_output_tokens": 65535, - "max_pdf_size_mb": 30, - "max_tokens": 65535, - "max_video_length": 1, - "max_videos_per_prompt": 10, - "mode": "chat", - "output_cost_per_reasoning_token": 0.0000035, - "output_cost_per_token": 6e-7, - "rpm": 10, - "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "audio" - ], - "supports_audio_output": false, - "supports_function_calling": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true, - "supports_web_search": true, - "tpm": 250000 - }, "gemini/gemini-2.5-pro": { - "cache_read_input_token_cost": 3.125e-7, + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, "input_cost_per_token": 0.00000125, "input_cost_per_token_above_200k_tokens": 0.0000025, + "input_cost_per_token_priority": 0.00000125, + "input_cost_per_token_above_200k_tokens_priority": 0.0000025, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -11712,8 +13914,11 @@ "mode": "chat", "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, + "output_cost_per_token_priority": 0.00001, + "output_cost_per_token_above_200k_tokens_priority": 0.000015, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supports_service_tier": true, "supported_endpoints": [ "/v1/chat/completions", "/v1/completions" @@ -11772,6 +13977,7 @@ "tpm": 800000 }, "gemini/gemini-3-pro-preview": { + "deprecation_date": "2026-03-09", "cache_read_input_token_cost": 2e-7, "cache_read_input_token_cost_above_200k_tokens": 4e-7, "input_cost_per_token": 0.000002, @@ -11818,27 +14024,35 @@ "supports_video_input": true, "supports_vision": true, "supports_web_search": true, - "tpm": 800000 - }, - "gemini/gemini-3-flash-preview": { - "cache_read_input_token_cost": 5e-8, - "input_cost_per_audio_token": 0.000001, - "input_cost_per_token": 5e-7, + "tpm": 800000, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true + }, + "gemini/gemini-3.1-flash-lite-preview": { + "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost_per_audio_token": 5e-8, + "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 2.5e-7, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, "max_input_tokens": 1048576, - "max_output_tokens": 65535, + "max_output_tokens": 65536, "max_pdf_size_mb": 30, - "max_tokens": 65535, + "max_tokens": 65536, "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_reasoning_token": 0.000003, - "output_cost_per_token": 0.000003, - "rpm": 2000, - "source": "https://ai.google.dev/pricing/gemini-3", + "output_cost_per_reasoning_token": 0.0000015, + "output_cost_per_token": 0.0000015, + "rpm": 15, + "source": "https://ai.google.dev/gemini-api/docs/models", "supported_endpoints": [ "/v1/chat/completions", "/v1/completions", @@ -11853,7 +14067,10 @@ "supported_output_modalities": [ "text" ], + "supports_audio_input": true, "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -11863,15 +14080,17 @@ "supports_system_messages": true, "supports_tool_choice": true, "supports_url_context": true, + "supports_video_input": true, "supports_vision": true, "supports_web_search": true, - "tpm": 800000 + "supports_native_streaming": true, + "tpm": 250000 }, - "gemini-3-flash-preview": { + "gemini/gemini-3-flash-preview": { "cache_read_input_token_cost": 5e-8, "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 5e-7, - "litellm_provider": "vertex_ai-language-models", + "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, @@ -11884,6 +14103,7 @@ "mode": "chat", "output_cost_per_reasoning_token": 0.000003, "output_cost_per_token": 0.000003, + "rpm": 2000, "source": "https://ai.google.dev/pricing/gemini-3", "supported_endpoints": [ "/v1/chat/completions", @@ -11910,30 +14130,41 @@ "supports_tool_choice": true, "supports_url_context": true, "supports_vision": true, - "supports_web_search": true - }, - "gemini/gemini-2.5-pro-exp-03-25": { - "cache_read_input_token_cost": 0, - "input_cost_per_token": 0, - "input_cost_per_token_above_200k_tokens": 0, + "supports_web_search": true, + "supports_native_streaming": true, + "tpm": 800000, + "input_cost_per_token_priority": 9e-7, + "input_cost_per_audio_token_priority": 0.0000018, + "output_cost_per_token_priority": 0.0000054, + "cache_read_input_token_cost_priority": 9e-8, + "supports_service_tier": true + }, + "gemini/gemini-3.1-pro-preview": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, "max_input_tokens": 1048576, - "max_output_tokens": 65535, + "max_output_tokens": 65536, "max_pdf_size_mb": 30, - "max_tokens": 65535, + "max_tokens": 65536, "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0, - "output_cost_per_token_above_200k_tokens": 0, - "rpm": 5, - "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, + "rpm": 2000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-3.1-pro-preview", "supported_endpoints": [ "/v1/chat/completions", - "/v1/completions" + "/v1/completions", + "/v1/batch" ], "supported_modalities": [ "text", @@ -11948,40 +14179,205 @@ "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, + "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, "supports_video_input": true, "supports_vision": true, "supports_web_search": true, - "tpm": 250000 - }, - "gemini/gemini-2.5-pro-preview-03-25": { - "cache_read_input_token_cost": 3.125e-7, - "input_cost_per_audio_token": 7e-7, - "input_cost_per_token": 0.00000125, - "input_cost_per_token_above_200k_tokens": 0.0000025, + "supports_url_context": true, + "supports_native_streaming": true, + "tpm": 800000, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true + }, + "gemini/gemini-3.1-pro-preview-customtools": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, "max_input_tokens": 1048576, - "max_output_tokens": 65535, + "max_output_tokens": 65536, "max_pdf_size_mb": 30, - "max_tokens": 65535, + "max_tokens": 65536, "max_video_length": 1, "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0.00001, - "output_cost_per_token_above_200k_tokens": 0.000015, - "rpm": 10000, - "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, + "rpm": 2000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-3.1-pro-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "tpm": 800000, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true + }, + "gemini-3-flash-preview": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 5e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 0.000003, + "output_cost_per_token": 0.000003, + "source": "https://ai.google.dev/pricing/gemini-3", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 9e-7, + "input_cost_per_audio_token_priority": 0.0000018, + "output_cost_per_token_priority": 0.0000054, + "cache_read_input_token_cost_priority": 9e-8, + "supports_service_tier": true + }, + "gemini/gemini-2.5-pro-exp-03-25": { + "cache_read_input_token_cost": 0, + "input_cost_per_token": 0, + "input_cost_per_token_above_200k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0, + "output_cost_per_token_above_200k_tokens": 0, + "rpm": 5, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini/gemini-2.5-pro-preview-03-25": { + "deprecation_date": "2025-12-02", + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, + "rpm": 10000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], "supported_output_modalities": [ "text" ], @@ -11997,7 +14393,9 @@ "tpm": 10000000 }, "gemini/gemini-2.5-pro-preview-05-06": { - "cache_read_input_token_cost": 3.125e-7, + "deprecation_date": "2025-12-02", + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, "input_cost_per_audio_token": 7e-7, "input_cost_per_token": 0.00000125, "input_cost_per_token_above_200k_tokens": 0.0000025, @@ -12038,7 +14436,8 @@ "tpm": 10000000 }, "gemini/gemini-2.5-pro-preview-06-05": { - "cache_read_input_token_cost": 3.125e-7, + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, "input_cost_per_audio_token": 7e-7, "input_cost_per_token": 0.00000125, "input_cost_per_token_above_200k_tokens": 0.0000025, @@ -12079,7 +14478,8 @@ "tpm": 10000000 }, "gemini/gemini-2.5-pro-preview-tts": { - "cache_read_input_token_cost": 3.125e-7, + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, "input_cost_per_audio_token": 7e-7, "input_cost_per_token": 0.00000125, "input_cost_per_token_above_200k_tokens": 0.0000025, @@ -12182,7 +14582,9 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "tpm": 250000, + "rpm": 10 }, "gemini/gemini-gemma-2-9b-it": { "input_cost_per_token": 3.5e-7, @@ -12194,7 +14596,9 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "tpm": 250000, + "rpm": 10 }, "gemini/gemini-pro": { "input_cost_per_token": 3.5e-7, @@ -12313,6 +14717,19 @@ "supports_parallel_function_calling": true, "supports_vision": true }, + "github_copilot/claude-opus-4.6-fast": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, "github_copilot/claude-opus-41": { "litellm_provider": "github_copilot", "max_input_tokens": 80000, @@ -12546,97 +14963,307 @@ "supports_response_schema": true, "supports_vision": true }, - "google.gemma-3-12b-it": { - "input_cost_per_token": 9e-8, - "litellm_provider": "bedrock_converse", + "gigachat/GigaChat-2-Lite": { + "input_cost_per_token": 0, + "litellm_provider": "gigachat", "max_input_tokens": 128000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.9e-7, - "supports_system_messages": true, - "supports_vision": true + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_system_messages": true }, - "google.gemma-3-27b-it": { - "input_cost_per_token": 2.3e-7, - "litellm_provider": "bedrock_converse", + "gigachat/GigaChat-2-Max": { + "input_cost_per_token": 0, + "litellm_provider": "gigachat", "max_input_tokens": 128000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 3.8e-7, + "output_cost_per_token": 0, + "supports_function_calling": true, "supports_system_messages": true, "supports_vision": true }, - "google.gemma-3-4b-it": { - "input_cost_per_token": 4e-8, - "litellm_provider": "bedrock_converse", + "gigachat/GigaChat-2-Pro": { + "input_cost_per_token": 0, + "litellm_provider": "gigachat", "max_input_tokens": 128000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 8e-8, + "output_cost_per_token": 0, + "supports_function_calling": true, "supports_system_messages": true, "supports_vision": true }, - "global.anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 0.00000375, - "cache_read_input_token_cost": 3e-7, + "gmi/anthropic/claude-opus-4.5": { + "input_cost_per_token": 0.000005, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/anthropic/claude-sonnet-4.5": { "input_cost_per_token": 0.000003, - "input_cost_per_token_above_200k_tokens": 0.000006, - "output_cost_per_token_above_200k_tokens": 0.0000225, - "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, - "cache_read_input_token_cost_above_200k_tokens": 6e-7, - "litellm_provider": "bedrock_converse", - "max_input_tokens": 200000, - "max_output_tokens": 64000, - "max_tokens": 64000, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, "mode": "chat", "output_cost_per_token": 0.000015, - "search_context_cost_per_query": { - "search_context_size_high": 0.01, - "search_context_size_low": 0.01, - "search_context_size_medium": 0.01 - }, - "supports_assistant_prefill": true, - "supports_computer_use": true, "supports_function_calling": true, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "supports_vision": true }, - "global.anthropic.claude-sonnet-4-20250514-v1:0": { - "cache_creation_input_token_cost": 0.00000375, - "cache_read_input_token_cost": 3e-7, + "gmi/anthropic/claude-sonnet-4": { "input_cost_per_token": 0.000003, - "input_cost_per_token_above_200k_tokens": 0.000006, - "output_cost_per_token_above_200k_tokens": 0.0000225, - "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, - "cache_read_input_token_cost_above_200k_tokens": 6e-7, - "litellm_provider": "bedrock_converse", - "max_input_tokens": 1000000, - "max_output_tokens": 64000, - "max_tokens": 64000, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, "mode": "chat", "output_cost_per_token": 0.000015, - "search_context_cost_per_query": { - "search_context_size_high": 0.01, - "search_context_size_low": 0.01, - "search_context_size_medium": 0.01 - }, - "supports_assistant_prefill": true, - "supports_computer_use": true, "supports_function_calling": true, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_tool_choice": true, - "supports_vision": true, + "supports_vision": true + }, + "gmi/anthropic/claude-opus-4": { + "input_cost_per_token": 0.000015, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.000075, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/openai/gpt-5.2": { + "input_cost_per_token": 0.00000175, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.000014, + "supports_function_calling": true + }, + "gmi/openai/gpt-5.1": { + "input_cost_per_token": 0.00000125, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.00001, + "supports_function_calling": true + }, + "gmi/openai/gpt-5": { + "input_cost_per_token": 0.00000125, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.00001, + "supports_function_calling": true + }, + "gmi/openai/gpt-4o": { + "input_cost_per_token": 0.0000025, + "litellm_provider": "gmi", + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 0.00001, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/openai/gpt-4o-mini": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "gmi", + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/deepseek-ai/DeepSeek-V3.2": { + "input_cost_per_token": 2.8e-7, + "litellm_provider": "gmi", + "max_input_tokens": 163840, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_function_calling": true + }, + "gmi/deepseek-ai/DeepSeek-V3-0324": { + "input_cost_per_token": 2.8e-7, + "litellm_provider": "gmi", + "max_input_tokens": 163840, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 8.8e-7, + "supports_function_calling": true + }, + "gmi/google/gemini-3-pro-preview": { + "input_cost_per_token": 0.000002, + "litellm_provider": "gmi", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 0.000012, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/google/gemini-3-flash-preview": { + "input_cost_per_token": 5e-7, + "litellm_provider": "gmi", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 0.000003, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/moonshotai/Kimi-K2-Thinking": { + "input_cost_per_token": 8e-7, + "litellm_provider": "gmi", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 0.0000012 + }, + "gmi/MiniMaxAI/MiniMax-M2.1": { + "input_cost_per_token": 3e-7, + "litellm_provider": "gmi", + "max_input_tokens": 196608, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 0.0000012 + }, + "gmi/Qwen/Qwen3-VL-235B-A22B-Instruct-FP8": { + "input_cost_per_token": 3e-7, + "litellm_provider": "gmi", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 0.0000014, + "supports_vision": true + }, + "gmi/zai-org/GLM-4.7-FP8": { + "input_cost_per_token": 4e-7, + "litellm_provider": "gmi", + "max_input_tokens": 202752, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 0.000002 + }, + "google.gemma-3-12b-it": { + "input_cost_per_token": 9e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.9e-7, + "supports_system_messages": true, + "supports_vision": true + }, + "google.gemma-3-27b-it": { + "input_cost_per_token": 2.3e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.8e-7, + "supports_system_messages": true, + "supports_vision": true + }, + "google.gemma-3-4b-it": { + "input_cost_per_token": 4e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 8e-8, + "supports_system_messages": true, + "supports_vision": true + }, + "global.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "global.anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, "global.anthropic.claude-haiku-4-5-20251001-v1:0": { @@ -12683,7 +15310,7 @@ "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, - "max_tokens": 4097, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.0000015, "supports_function_calling": true, @@ -12696,7 +15323,7 @@ "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, - "max_tokens": 16385, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.0000015, "supports_function_calling": true, @@ -12710,7 +15337,7 @@ "litellm_provider": "openai", "max_input_tokens": 4097, "max_output_tokens": 4096, - "max_tokens": 4097, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.000002, "supports_prompt_caching": true, @@ -12722,7 +15349,7 @@ "litellm_provider": "openai", "max_input_tokens": 4097, "max_output_tokens": 4096, - "max_tokens": 4097, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.000002, "supports_function_calling": true, @@ -12736,7 +15363,7 @@ "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, - "max_tokens": 16385, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.000002, "supports_function_calling": true, @@ -12750,7 +15377,7 @@ "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, - "max_tokens": 16385, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.000004, "supports_prompt_caching": true, @@ -12762,7 +15389,7 @@ "litellm_provider": "openai", "max_input_tokens": 16385, "max_output_tokens": 4096, - "max_tokens": 16385, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.000004, "supports_prompt_caching": true, @@ -13292,14 +15919,14 @@ "supports_vision": true }, "gpt-4o-audio-preview": { - "input_cost_per_audio_token": 0.0001, + "input_cost_per_audio_token": 0.00004, "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 0.0002, + "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00001, "supports_audio_input": true, "supports_audio_output": true, @@ -13309,14 +15936,14 @@ "supports_tool_choice": true }, "gpt-4o-audio-preview-2024-10-01": { - "input_cost_per_audio_token": 0.0001, + "input_cost_per_audio_token": 0.00004, "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_audio_token": 0.0002, + "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00001, "supports_audio_input": true, "supports_audio_output": true, @@ -13359,31 +15986,244 @@ "supports_system_messages": true, "supports_tool_choice": true }, - "gpt-4o-mini": { - "cache_read_input_token_cost": 7.5e-8, - "cache_read_input_token_cost_priority": 1.25e-7, - "input_cost_per_token": 1.5e-7, - "input_cost_per_token_batches": 7.5e-8, - "input_cost_per_token_priority": 2.5e-7, + "gpt-audio": { + "input_cost_per_audio_token": 0.000032, + "input_cost_per_token": 0.0000025, "litellm_provider": "openai", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 6e-7, - "output_cost_per_token_batches": 3e-7, - "output_cost_per_token_priority": 0.000001, + "output_cost_per_audio_token": 0.000064, + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses", + "/v1/realtime", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, "supports_function_calling": true, + "supports_native_streaming": true, "supports_parallel_function_calling": true, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_response_schema": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, "supports_system_messages": true, "supports_tool_choice": true, - "supports_service_tier": true, - "supports_vision": true + "supports_vision": false }, - "gpt-4o-mini-2024-07-18": { + "gpt-audio-1.5": { + "input_cost_per_audio_token": 0.000032, + "input_cost_per_token": 0.0000025, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 0.000064, + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "gpt-audio-2025-08-28": { + "input_cost_per_audio_token": 0.000032, + "input_cost_per_token": 0.0000025, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 0.000064, + "output_cost_per_token": 0.00001, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses", + "/v1/realtime", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "gpt-audio-mini": { + "input_cost_per_audio_token": 0.00001, + "input_cost_per_token": 6e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses", + "/v1/realtime", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "gpt-audio-mini-2025-10-06": { + "input_cost_per_audio_token": 0.00001, + "input_cost_per_token": 6e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses", + "/v1/realtime", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "gpt-audio-mini-2025-12-15": { + "input_cost_per_audio_token": 0.00001, + "input_cost_per_token": 6e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses", + "/v1/realtime", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "gpt-4o-mini": { + "cache_read_input_token_cost": 7.5e-8, + "cache_read_input_token_cost_priority": 1.25e-7, + "input_cost_per_token": 1.5e-7, + "input_cost_per_token_batches": 7.5e-8, + "input_cost_per_token_priority": 2.5e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-7, + "output_cost_per_token_batches": 3e-7, + "output_cost_per_token_priority": 0.000001, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4o-mini-2024-07-18": { "cache_read_input_token_cost": 7.5e-8, "input_cost_per_token": 1.5e-7, "input_cost_per_token_batches": 7.5e-8, @@ -13798,7 +16638,7 @@ "input_cost_per_token": 0.00000175, "input_cost_per_token_priority": 0.0000035, "litellm_provider": "openai", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -13835,7 +16675,7 @@ "input_cost_per_token": 0.00000175, "input_cost_per_token_priority": 0.0000035, "litellm_provider": "openai", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -13900,6 +16740,40 @@ "supports_tool_choice": true, "supports_vision": true }, + "gpt-5.3-chat-latest": { + "cache_read_input_token_cost": 1.75e-7, + "cache_read_input_token_cost_priority": 3.5e-7, + "input_cost_per_token": 0.00000175, + "input_cost_per_token_priority": 0.0000035, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 0.000014, + "output_cost_per_token_priority": 0.000028, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, "gpt-5-2025-08-07": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_flex": 6.25e-8, @@ -13943,9 +16817,9 @@ "cache_read_input_token_cost": 1.25e-7, "input_cost_per_token": 0.00000125, "litellm_provider": "openai", - "max_input_tokens": 272000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, "supported_endpoints": [ @@ -14184,18 +17058,19 @@ "supports_system_messages": true, "supports_tool_choice": true }, - "gpt-realtime-mini": { - "cache_creation_input_audio_token_cost": 3e-7, - "cache_read_input_audio_token_cost": 3e-7, - "input_cost_per_audio_token": 0.00001, - "input_cost_per_token": 6e-7, + "gpt-realtime-1.5": { + "cache_creation_input_audio_token_cost": 4e-7, + "cache_read_input_token_cost": 4e-7, + "input_cost_per_audio_token": 0.000032, + "input_cost_per_image": 0.000005, + "input_cost_per_token": 0.000004, "litellm_provider": "openai", - "max_input_tokens": 128000, + "max_input_tokens": 32000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_audio_token": 0.00002, - "output_cost_per_token": 0.0000024, + "output_cost_per_audio_token": 0.000064, + "output_cost_per_token": 0.000016, "supported_endpoints": [ "/v1/realtime" ], @@ -14215,10 +17090,41 @@ "supports_system_messages": true, "supports_tool_choice": true }, - "gpt-realtime-2025-08-28": { - "cache_creation_input_audio_token_cost": 4e-7, - "cache_read_input_token_cost": 4e-7, - "input_cost_per_audio_token": 0.000032, + "gpt-realtime-mini": { + "cache_creation_input_audio_token_cost": 3e-7, + "cache_read_input_audio_token_cost": 3e-7, + "input_cost_per_audio_token": 0.00001, + "input_cost_per_token": 6e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-realtime-2025-08-28": { + "cache_creation_input_audio_token_cost": 4e-7, + "cache_read_input_token_cost": 4e-7, + "input_cost_per_audio_token": 0.000032, "input_cost_per_image": 0.000005, "input_cost_per_token": 0.000004, "litellm_provider": "openai", @@ -14426,7 +17332,7 @@ "lemonade/Qwen3-Coder-30B-A3B-Instruct-GGUF": { "input_cost_per_token": 0, "litellm_provider": "lemonade", - "max_tokens": 262144, + "max_tokens": 32768, "max_input_tokens": 262144, "max_output_tokens": 32768, "mode": "chat", @@ -14438,7 +17344,7 @@ "lemonade/gpt-oss-20b-mxfp4-GGUF": { "input_cost_per_token": 0, "litellm_provider": "lemonade", - "max_tokens": 131072, + "max_tokens": 32768, "max_input_tokens": 131072, "max_output_tokens": 32768, "mode": "chat", @@ -14450,7 +17356,7 @@ "lemonade/gpt-oss-120b-mxfp-GGUF": { "input_cost_per_token": 0, "litellm_provider": "lemonade", - "max_tokens": 131072, + "max_tokens": 32768, "max_input_tokens": 131072, "max_output_tokens": 32768, "mode": "chat", @@ -14462,7 +17368,7 @@ "lemonade/Gemma-3-4b-it-GGUF": { "input_cost_per_token": 0, "litellm_provider": "lemonade", - "max_tokens": 128000, + "max_tokens": 8192, "max_input_tokens": 128000, "max_output_tokens": 8192, "mode": "chat", @@ -14474,7 +17380,7 @@ "lemonade/Qwen3-4B-Instruct-2507-GGUF": { "input_cost_per_token": 0, "litellm_provider": "lemonade", - "max_tokens": 262144, + "max_tokens": 32768, "max_input_tokens": 262144, "max_output_tokens": 32768, "mode": "chat", @@ -14537,69 +17443,6 @@ "supports_response_schema": true, "supports_vision": true }, - "groq/deepseek-r1-distill-llama-70b": { - "input_cost_per_token": 7.5e-7, - "litellm_provider": "groq", - "max_input_tokens": 128000, - "max_output_tokens": 128000, - "max_tokens": 128000, - "mode": "chat", - "output_cost_per_token": 9.9e-7, - "supports_function_calling": true, - "supports_reasoning": true, - "supports_response_schema": false, - "supports_tool_choice": true - }, - "groq/gemma-7b-it": { - "deprecation_date": "2024-12-18", - "input_cost_per_token": 7e-8, - "litellm_provider": "groq", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 7e-8, - "supports_function_calling": true, - "supports_response_schema": false, - "supports_tool_choice": true - }, - "groq/gemma2-9b-it": { - "input_cost_per_token": 2e-7, - "litellm_provider": "groq", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 2e-7, - "supports_function_calling": false, - "supports_response_schema": false, - "supports_tool_choice": false - }, - "groq/llama-3.1-405b-reasoning": { - "input_cost_per_token": 5.9e-7, - "litellm_provider": "groq", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 7.9e-7, - "supports_function_calling": true, - "supports_response_schema": false, - "supports_tool_choice": true - }, - "groq/llama-3.1-70b-versatile": { - "deprecation_date": "2025-01-24", - "input_cost_per_token": 5.9e-7, - "litellm_provider": "groq", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 7.9e-7, - "supports_function_calling": true, - "supports_response_schema": false, - "supports_tool_choice": true - }, "groq/llama-3.1-8b-instant": { "input_cost_per_token": 5e-8, "litellm_provider": "groq", @@ -14612,97 +17455,6 @@ "supports_response_schema": false, "supports_tool_choice": true }, - "groq/llama-3.2-11b-text-preview": { - "deprecation_date": "2024-10-28", - "input_cost_per_token": 1.8e-7, - "litellm_provider": "groq", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 1.8e-7, - "supports_function_calling": true, - "supports_response_schema": false, - "supports_tool_choice": true - }, - "groq/llama-3.2-11b-vision-preview": { - "deprecation_date": "2025-04-14", - "input_cost_per_token": 1.8e-7, - "litellm_provider": "groq", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 1.8e-7, - "supports_function_calling": true, - "supports_response_schema": false, - "supports_tool_choice": true, - "supports_vision": true - }, - "groq/llama-3.2-1b-preview": { - "deprecation_date": "2025-04-14", - "input_cost_per_token": 4e-8, - "litellm_provider": "groq", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 4e-8, - "supports_function_calling": true, - "supports_response_schema": false, - "supports_tool_choice": true - }, - "groq/llama-3.2-3b-preview": { - "deprecation_date": "2025-04-14", - "input_cost_per_token": 6e-8, - "litellm_provider": "groq", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 6e-8, - "supports_function_calling": true, - "supports_response_schema": false, - "supports_tool_choice": true - }, - "groq/llama-3.2-90b-text-preview": { - "deprecation_date": "2024-11-25", - "input_cost_per_token": 9e-7, - "litellm_provider": "groq", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 9e-7, - "supports_function_calling": true, - "supports_response_schema": false, - "supports_tool_choice": true - }, - "groq/llama-3.2-90b-vision-preview": { - "deprecation_date": "2025-04-14", - "input_cost_per_token": 9e-7, - "litellm_provider": "groq", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 9e-7, - "supports_function_calling": true, - "supports_response_schema": false, - "supports_tool_choice": true, - "supports_vision": true - }, - "groq/llama-3.3-70b-specdec": { - "deprecation_date": "2025-04-14", - "input_cost_per_token": 5.9e-7, - "litellm_provider": "groq", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 9.9e-7, - "supports_tool_choice": true - }, "groq/llama-3.3-70b-versatile": { "input_cost_per_token": 5.9e-7, "litellm_provider": "groq", @@ -14715,52 +17467,26 @@ "supports_response_schema": false, "supports_tool_choice": true }, - "groq/llama-guard-3-8b": { - "input_cost_per_token": 2e-7, - "litellm_provider": "groq", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 2e-7 - }, - "groq/llama2-70b-4096": { - "input_cost_per_token": 7e-7, - "litellm_provider": "groq", - "max_input_tokens": 4096, - "max_output_tokens": 4096, - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 8e-7, - "supports_function_calling": true, - "supports_response_schema": false, - "supports_tool_choice": true - }, - "groq/llama3-groq-70b-8192-tool-use-preview": { - "deprecation_date": "2025-01-06", - "input_cost_per_token": 8.9e-7, + "groq/gemma-7b-it": { + "input_cost_per_token": 5e-8, "litellm_provider": "groq", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 8.9e-7, + "output_cost_per_token": 8e-8, "supports_function_calling": true, "supports_response_schema": false, "supports_tool_choice": true }, - "groq/llama3-groq-8b-8192-tool-use-preview": { - "deprecation_date": "2025-01-06", - "input_cost_per_token": 1.9e-7, + "groq/meta-llama/llama-guard-4-12b": { + "input_cost_per_token": 2e-7, "litellm_provider": "groq", "max_input_tokens": 8192, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.9e-7, - "supports_function_calling": true, - "supports_response_schema": false, - "supports_tool_choice": true + "output_cost_per_token": 2e-7 }, "groq/meta-llama/llama-4-maverick-17b-128e-instruct": { "input_cost_per_token": 2e-7, @@ -14772,7 +17498,8 @@ "output_cost_per_token": 6e-7, "supports_function_calling": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true }, "groq/meta-llama/llama-4-scout-17b-16e-instruct": { "input_cost_per_token": 1.1e-7, @@ -14784,41 +17511,8 @@ "output_cost_per_token": 3.4e-7, "supports_function_calling": true, "supports_response_schema": true, - "supports_tool_choice": true - }, - "groq/mistral-saba-24b": { - "input_cost_per_token": 7.9e-7, - "litellm_provider": "groq", - "max_input_tokens": 32000, - "max_output_tokens": 32000, - "max_tokens": 32000, - "mode": "chat", - "output_cost_per_token": 7.9e-7 - }, - "groq/mixtral-8x7b-32768": { - "deprecation_date": "2025-03-20", - "input_cost_per_token": 2.4e-7, - "litellm_provider": "groq", - "max_input_tokens": 32768, - "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", - "output_cost_per_token": 2.4e-7, - "supports_function_calling": true, - "supports_response_schema": false, - "supports_tool_choice": true - }, - "groq/moonshotai/kimi-k2-instruct": { - "input_cost_per_token": 0.000001, - "litellm_provider": "groq", - "max_input_tokens": 131072, - "max_output_tokens": 16384, - "max_tokens": 131072, - "mode": "chat", - "output_cost_per_token": 0.000003, - "supports_function_calling": true, - "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true }, "groq/moonshotai/kimi-k2-instruct-0905": { "input_cost_per_token": 0.000001, @@ -14827,20 +17521,21 @@ "litellm_provider": "groq", "max_input_tokens": 262144, "max_output_tokens": 16384, - "max_tokens": 278528, + "max_tokens": 16384, "mode": "chat", "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true }, "groq/openai/gpt-oss-120b": { + "cache_read_input_token_cost": 7.5e-8, "input_cost_per_token": 1.5e-7, "litellm_provider": "groq", "max_input_tokens": 131072, "max_output_tokens": 32766, "max_tokens": 32766, "mode": "chat", - "output_cost_per_token": 7.5e-7, + "output_cost_per_token": 6e-7, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_reasoning": true, @@ -14849,13 +17544,14 @@ "supports_web_search": true }, "groq/openai/gpt-oss-20b": { - "input_cost_per_token": 1e-7, + "cache_read_input_token_cost": 3.75e-8, + "input_cost_per_token": 7.5e-8, "litellm_provider": "groq", "max_input_tokens": 131072, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 5e-7, + "output_cost_per_token": 3e-7, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_reasoning": true, @@ -14863,8 +17559,23 @@ "supports_tool_choice": true, "supports_web_search": true }, - "groq/qwen/qwen3-32b": { - "input_cost_per_token": 2.9e-7, + "groq/openai/gpt-oss-safeguard-20b": { + "cache_read_input_token_cost": 3.7e-8, + "input_cost_per_token": 7.5e-8, + "litellm_provider": "groq", + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "groq/qwen/qwen3-32b": { + "input_cost_per_token": 2.9e-7, "litellm_provider": "groq", "max_input_tokens": 131000, "max_output_tokens": 131000, @@ -15383,7 +18094,7 @@ "litellm_provider": "lambda_ai", "max_input_tokens": 131072, "max_output_tokens": 8192, - "max_tokens": 131072, + "max_tokens": 8192, "mode": "chat", "output_cost_per_token": 1e-7, "supports_function_calling": true, @@ -15396,7 +18107,7 @@ "litellm_provider": "lambda_ai", "max_input_tokens": 16384, "max_output_tokens": 8192, - "max_tokens": 16384, + "max_tokens": 8192, "mode": "chat", "output_cost_per_token": 1e-7, "supports_function_calling": true, @@ -15589,7 +18300,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.000016, "supports_function_calling": true, @@ -15600,7 +18311,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 2048, - "max_tokens": 128000, + "max_tokens": 2048, "mode": "chat", "output_cost_per_token": 9.9e-7, "supports_function_calling": true, @@ -15611,7 +18322,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 2048, - "max_tokens": 128000, + "max_tokens": 2048, "mode": "chat", "output_cost_per_token": 2.2e-7, "supports_function_calling": true, @@ -15622,7 +18333,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 3.5e-7, "supports_function_calling": true, @@ -15634,7 +18345,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 1e-7, "supports_function_calling": true, @@ -15645,7 +18356,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 1.5e-7, "supports_function_calling": true, @@ -15656,7 +18367,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.000002, "supports_function_calling": true, @@ -15738,7 +18449,7 @@ "litellm_provider": "meta_llama", "max_input_tokens": 128000, "max_output_tokens": 4028, - "max_tokens": 128000, + "max_tokens": 4028, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", "supported_modalities": [ @@ -15754,7 +18465,7 @@ "litellm_provider": "meta_llama", "max_input_tokens": 128000, "max_output_tokens": 4028, - "max_tokens": 128000, + "max_tokens": 4028, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", "supported_modalities": [ @@ -15770,7 +18481,7 @@ "litellm_provider": "meta_llama", "max_input_tokens": 1000000, "max_output_tokens": 4028, - "max_tokens": 128000, + "max_tokens": 4028, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", "supported_modalities": [ @@ -15787,7 +18498,7 @@ "litellm_provider": "meta_llama", "max_input_tokens": 10000000, "max_output_tokens": 4028, - "max_tokens": 128000, + "max_tokens": 4028, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", "supported_modalities": [ @@ -15810,6 +18521,94 @@ "output_cost_per_token": 0.0000012, "supports_system_messages": true }, + "minimax.minimax-m2.1": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "minimax/MiniMax-M2.1": { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000012, + "cache_read_input_token_cost": 3e-8, + "cache_creation_input_token_cost": 3.75e-7, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, + "minimax/MiniMax-M2.1-lightning": { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000024, + "cache_read_input_token_cost": 3e-8, + "cache_creation_input_token_cost": 3.75e-7, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, + "minimax/MiniMax-M2.5": { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000012, + "cache_read_input_token_cost": 3e-8, + "cache_creation_input_token_cost": 3.75e-7, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, + "minimax/MiniMax-M2.5-lightning": { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000024, + "cache_read_input_token_cost": 3e-8, + "cache_creation_input_token_cost": 3.75e-7, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, + "minimax/MiniMax-M2": { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000012, + "cache_read_input_token_cost": 3e-8, + "cache_creation_input_token_cost": 3.75e-7, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 200000, + "max_output_tokens": 8192 + }, "mistral.magistral-small-2509": { "input_cost_per_token": 5e-7, "litellm_provider": "bedrock_converse", @@ -16031,6 +18830,20 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "mistral/devstral-small-latest": { + "input_cost_per_token": 1e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 3e-7, + "source": "https://docs.mistral.ai/models/devstral-small-2-25-12", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "mistral/labs-devstral-small-2512": { "input_cost_per_token": 1e-7, "litellm_provider": "mistral", @@ -16045,6 +18858,34 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "mistral/devstral-latest": { + "input_cost_per_token": 4e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 0.000002, + "source": "https://mistral.ai/news/devstral-2-vibe-cli", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/devstral-medium-latest": { + "input_cost_per_token": 4e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 0.000002, + "source": "https://mistral.ai/news/devstral-2-vibe-cli", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "mistral/devstral-2512": { "input_cost_per_token": 4e-7, "litellm_provider": "mistral", @@ -16089,6 +18930,21 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "mistral/magistral-medium-1-2-2509": { + "input_cost_per_token": 0.000002, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 0.000005, + "source": "https://mistral.ai/news/magistral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "mistral/magistral-medium-latest": { "input_cost_per_token": 0.000002, "litellm_provider": "mistral", @@ -16134,6 +18990,21 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "mistral/magistral-small-1-2-2509": { + "input_cost_per_token": 5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 0.0000015, + "source": "https://mistral.ai/pricing#api-pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "mistral/mistral-large-2402": { "input_cost_per_token": 0.000004, "litellm_provider": "mistral", @@ -16174,24 +19045,41 @@ "supports_tool_choice": true }, "mistral/mistral-large-latest": { - "input_cost_per_token": 0.000002, + "input_cost_per_token": 5e-7, "litellm_provider": "mistral", - "max_input_tokens": 128000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 0.000006, + "output_cost_per_token": 0.0000015, + "source": "https://docs.mistral.ai/models/mistral-large-3-25-12", "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true }, "mistral/mistral-large-3": { "input_cost_per_token": 5e-7, "litellm_provider": "mistral", - "max_input_tokens": 256000, - "max_output_tokens": 8191, - "max_tokens": 8191, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000015, + "source": "https://docs.mistral.ai/models/mistral-large-3-25-12", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/mistral-large-2512": { + "input_cost_per_token": 5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, "mode": "chat", "output_cost_per_token": 0.0000015, "source": "https://docs.mistral.ai/models/mistral-large-3-25-12", @@ -16242,29 +19130,32 @@ "input_cost_per_token": 4e-7, "litellm_provider": "mistral", "max_input_tokens": 131072, - "max_output_tokens": 8191, - "max_tokens": 8191, + "max_output_tokens": 131072, + "max_tokens": 131072, "mode": "chat", "output_cost_per_token": 0.000002, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true }, - "mistral/mistral-small": { - "input_cost_per_token": 1e-7, + "mistral/mistral-medium-3-1-2508": { + "input_cost_per_token": 4e-7, "litellm_provider": "mistral", - "max_input_tokens": 32000, - "max_output_tokens": 8191, - "max_tokens": 8191, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 3e-7, + "output_cost_per_token": 0.000002, + "source": "https://mistral.ai/news/mistral-medium-3", "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true }, - "mistral/mistral-small-latest": { + "mistral/mistral-small": { "input_cost_per_token": 1e-7, "litellm_provider": "mistral", "max_input_tokens": 32000, @@ -16277,30 +19168,105 @@ "supports_response_schema": true, "supports_tool_choice": true }, - "mistral/mistral-tiny": { - "input_cost_per_token": 2.5e-7, + "mistral/mistral-small-latest": { + "input_cost_per_token": 6e-8, "litellm_provider": "mistral", - "max_input_tokens": 32000, - "max_output_tokens": 8191, - "max_tokens": 8191, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.5e-7, + "output_cost_per_token": 1.8e-7, + "source": "https://mistral.ai/pricing", "supports_assistant_prefill": true, + "supports_function_calling": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true }, - "mistral/open-codestral-mamba": { - "input_cost_per_token": 2.5e-7, + "mistral/mistral-small-3-2-2506": { + "input_cost_per_token": 6e-8, "litellm_provider": "mistral", - "max_input_tokens": 256000, - "max_output_tokens": 256000, - "max_tokens": 256000, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 2.5e-7, - "source": "https://mistral.ai/technology/", + "output_cost_per_token": 1.8e-7, + "source": "https://mistral.ai/pricing", "supports_assistant_prefill": true, - "supports_tool_choice": true - }, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/ministral-3-3b-2512": { + "input_cost_per_token": 1e-7, + "litellm_provider": "mistral", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-7, + "source": "https://mistral.ai/pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/ministral-3-8b-2512": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "source": "https://mistral.ai/pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/ministral-3-14b-2512": { + "input_cost_per_token": 2e-7, + "litellm_provider": "mistral", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2e-7, + "source": "https://mistral.ai/pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/mistral-tiny": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.5e-7, + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/open-codestral-mamba": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 2.5e-7, + "source": "https://mistral.ai/technology/", + "supports_assistant_prefill": true, + "supports_tool_choice": true + }, "mistral/open-mistral-7b": { "input_cost_per_token": 2.5e-7, "litellm_provider": "mistral", @@ -16418,6 +19384,20 @@ "supports_reasoning": true, "supports_system_messages": true }, + "moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "moonshot/kimi-k2-0711-preview": { "cache_read_input_token_cost": 1.5e-7, "input_cost_per_token": 6e-7, @@ -16460,6 +19440,21 @@ "supports_tool_choice": true, "supports_web_search": true }, + "moonshot/kimi-k2.5": { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "source": "https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true + }, "moonshot/kimi-latest": { "cache_read_input_token_cost": 1.5e-7, "input_cost_per_token": 0.000002, @@ -16832,605 +19827,920 @@ "output_cost_per_token": 6e-7, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" }, - "nvidia.nemotron-nano-12b-v2": { - "input_cost_per_token": 2e-7, - "litellm_provider": "bedrock_converse", + "nebius/deepseek-ai/DeepSeek-R1": { + "max_tokens": 128000, "max_input_tokens": 128000, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_output_tokens": 128000, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.0000024, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 6e-7, - "supports_system_messages": true, - "supports_vision": true + "supports_function_calling": true, + "supports_reasoning": true, + "source": "https://nebius.com/prices-ai-studio" }, - "nvidia.nemotron-nano-9b-v2": { - "input_cost_per_token": 6e-8, - "litellm_provider": "bedrock_converse", + "nebius/deepseek-ai/DeepSeek-R1-0528": { + "max_tokens": 164000, + "max_input_tokens": 164000, + "max_output_tokens": 164000, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.0000024, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "supports_reasoning": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { + "max_tokens": 128000, "max_input_tokens": 128000, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_output_tokens": 128000, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 7.5e-7, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 2.3e-7, - "supports_system_messages": true + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" }, - "o1": { - "cache_read_input_token_cost": 0.0000075, - "input_cost_per_token": 0.000015, - "litellm_provider": "openai", - "max_input_tokens": 200000, - "max_output_tokens": 100000, - "max_tokens": 100000, + "nebius/deepseek-ai/DeepSeek-V3": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.00006, "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true + "source": "https://nebius.com/prices-ai-studio" }, - "o1-2024-12-17": { - "cache_read_input_token_cost": 0.0000075, - "input_cost_per_token": 0.000015, - "litellm_provider": "openai", - "max_input_tokens": 200000, - "max_output_tokens": 100000, - "max_tokens": 100000, + "nebius/deepseek-ai/DeepSeek-V3-0324": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.00006, "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true + "source": "https://nebius.com/prices-ai-studio" }, - "o1-mini": { - "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 0.0000011, - "litellm_provider": "openai", + "nebius/google/gemma-3-27b-it": { + "max_tokens": 128000, "max_input_tokens": 128000, - "max_output_tokens": 65536, - "max_tokens": 65536, + "max_output_tokens": 128000, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 2e-7, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.0000044, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_vision": true + "supports_function_calling": true, + "supports_vision": true, + "source": "https://nebius.com/prices-ai-studio" }, - "o1-mini-2024-09-12": { - "deprecation_date": "2025-10-27", - "cache_read_input_token_cost": 0.0000015, - "input_cost_per_token": 0.000003, - "litellm_provider": "openai", + "nebius/meta-llama/Llama-3.3-70B-Instruct": { + "max_tokens": 128000, "max_input_tokens": 128000, - "max_output_tokens": 65536, - "max_tokens": 65536, + "max_output_tokens": 128000, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.000012, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_vision": true + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" }, - "o1-preview": { - "cache_read_input_token_cost": 0.0000075, - "input_cost_per_token": 0.000015, - "litellm_provider": "openai", + "nebius/meta-llama/Llama-Guard-3-8B": { + "max_tokens": 128000, "max_input_tokens": 128000, - "max_output_tokens": 32768, - "max_tokens": 32768, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 6e-8, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.00006, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_vision": true + "source": "https://nebius.com/prices-ai-studio" }, - "o1-preview-2024-09-12": { - "cache_read_input_token_cost": 0.0000075, - "input_cost_per_token": 0.000015, - "litellm_provider": "openai", + "nebius/meta-llama/Meta-Llama-3.1-8B-Instruct": { + "max_tokens": 128000, "max_input_tokens": 128000, - "max_output_tokens": 32768, - "max_tokens": 32768, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 6e-8, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.00006, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_vision": true + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" }, - "o3": { - "cache_read_input_token_cost": 5e-7, - "cache_read_input_token_cost_flex": 2.5e-7, - "cache_read_input_token_cost_priority": 8.75e-7, - "input_cost_per_token": 0.000002, - "input_cost_per_token_flex": 0.000001, - "input_cost_per_token_priority": 0.0000035, - "litellm_provider": "openai", - "max_input_tokens": 200000, - "max_output_tokens": 100000, - "max_tokens": 100000, + "nebius/meta-llama/Meta-Llama-3.1-70B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.000008, - "output_cost_per_token_flex": 0.000004, - "output_cost_per_token_priority": 0.000014, - "supported_endpoints": [ - "/v1/responses", - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], "supports_function_calling": true, - "supports_parallel_function_calling": false, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_tool_choice": true, - "supports_service_tier": true, - "supports_vision": true + "source": "https://nebius.com/prices-ai-studio" }, - "o3-2025-04-16": { - "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 0.000002, - "litellm_provider": "openai", - "max_input_tokens": 200000, - "max_output_tokens": 100000, - "max_tokens": 100000, + "nebius/meta-llama/Meta-Llama-3.1-405B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.000008, - "supported_endpoints": [ - "/v1/responses", - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], "supports_function_calling": true, - "supports_parallel_function_calling": false, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_tool_choice": true, - "supports_service_tier": true, - "supports_vision": true + "source": "https://nebius.com/prices-ai-studio" }, - "o3-mini": { - "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 0.0000011, - "litellm_provider": "openai", - "max_input_tokens": 200000, - "max_output_tokens": 100000, - "max_tokens": 100000, + "nebius/mistralai/Mistral-Nemo-Instruct-2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.2e-7, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.0000044, "supports_function_calling": true, - "supports_parallel_function_calling": false, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_tool_choice": true, - "supports_vision": false + "source": "https://nebius.com/prices-ai-studio" }, - "o3-mini-2025-01-31": { - "cache_read_input_token_cost": 5.5e-7, - "input_cost_per_token": 0.0000011, - "litellm_provider": "openai", - "max_input_tokens": 200000, - "max_output_tokens": 100000, - "max_tokens": 100000, + "nebius/NousResearch/Hermes-3-Llama-3.1-405B": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.0000044, "supports_function_calling": true, - "supports_parallel_function_calling": false, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_tool_choice": true, - "supports_vision": false + "source": "https://nebius.com/prices-ai-studio" }, - "o4-mini": { - "cache_read_input_token_cost": 2.75e-7, - "cache_read_input_token_cost_flex": 1.375e-7, - "cache_read_input_token_cost_priority": 5e-7, - "input_cost_per_token": 0.0000011, - "input_cost_per_token_flex": 5.5e-7, - "input_cost_per_token_priority": 0.000002, - "litellm_provider": "openai", - "max_input_tokens": 200000, - "max_output_tokens": 100000, - "max_tokens": 100000, + "nebius/nvidia/Llama-3.1-Nemotron-Ultra-253B-v1": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000018, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.0000044, - "output_cost_per_token_flex": 0.0000022, - "output_cost_per_token_priority": 0.000008, "supports_function_calling": true, - "supports_parallel_function_calling": false, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_tool_choice": true, - "supports_service_tier": true, - "supports_vision": true + "source": "https://nebius.com/prices-ai-studio" }, - "o4-mini-2025-04-16": { - "cache_read_input_token_cost": 2.75e-7, - "input_cost_per_token": 0.0000011, - "litellm_provider": "openai", - "max_input_tokens": 200000, - "max_output_tokens": 100000, - "max_tokens": 100000, + "nebius/nvidia/Llama-3.3-Nemotron-Super-49B-v1": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.0000044, "supports_function_calling": true, - "supports_parallel_function_calling": false, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_tool_choice": true, - "supports_service_tier": true, - "supports_vision": true + "source": "https://nebius.com/prices-ai-studio" }, - "oci/meta.llama-3.1-405b-instruct": { - "input_cost_per_token": 0.00001068, - "litellm_provider": "oci", - "max_input_tokens": 128000, - "max_output_tokens": 4000, - "max_tokens": 128000, + "nebius/Qwen/Qwen3-235B-A22B": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.00001068, - "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "source": "https://nebius.com/prices-ai-studio" }, - "oci/meta.llama-3.2-90b-vision-instruct": { - "input_cost_per_token": 0.000002, - "litellm_provider": "oci", - "max_input_tokens": 128000, - "max_output_tokens": 4000, - "max_tokens": 128000, + "nebius/Qwen/Qwen3-32B": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.000002, - "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "source": "https://nebius.com/prices-ai-studio" }, - "oci/meta.llama-3.3-70b-instruct": { - "input_cost_per_token": 7.2e-7, - "litellm_provider": "oci", - "max_input_tokens": 128000, - "max_output_tokens": 4000, + "nebius/Qwen/Qwen3-30B-A3B": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/Qwen3-14B": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 2.4e-7, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/Qwen3-4B": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 2.4e-7, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/QwQ-32B": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 4.5e-7, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "supports_reasoning": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/Qwen2.5-72B-Instruct": { "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 7.2e-7, - "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "source": "https://nebius.com/prices-ai-studio" }, - "oci/meta.llama-4-maverick-17b-128e-instruct-fp8": { - "input_cost_per_token": 7.2e-7, - "litellm_provider": "oci", - "max_input_tokens": 512000, - "max_output_tokens": 4000, - "max_tokens": 512000, + "nebius/Qwen/Qwen2.5-32B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 2e-7, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 7.2e-7, - "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "source": "https://nebius.com/prices-ai-studio" }, - "oci/meta.llama-4-scout-17b-16e-instruct": { - "input_cost_per_token": 7.2e-7, - "litellm_provider": "oci", - "max_input_tokens": 192000, - "max_output_tokens": 4000, - "max_tokens": 192000, + "nebius/Qwen/Qwen2.5-Coder-7B": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-8, + "output_cost_per_token": 3e-8, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 7.2e-7, - "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "source": "https://nebius.com/prices-ai-studio" }, - "oci/xai.grok-3": { - "input_cost_per_token": 0.000003, - "litellm_provider": "oci", + "nebius/Qwen/Qwen2.5-VL-72B-Instruct": { + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 1.5e-7, - "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "supports_vision": true, + "source": "https://nebius.com/prices-ai-studio" }, - "oci/xai.grok-3-fast": { - "input_cost_per_token": 0.000005, - "litellm_provider": "oci", + "nebius/Qwen/Qwen2-VL-72B-Instruct": { + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.000025, - "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "supports_vision": true, + "source": "https://nebius.com/prices-ai-studio" }, - "oci/xai.grok-3-mini": { - "input_cost_per_token": 3e-7, - "litellm_provider": "oci", - "max_input_tokens": 131072, - "max_output_tokens": 131072, + "nebius/Qwen/Qwen2-VL-7B-Instruct": { "max_tokens": 131072, - "mode": "chat", - "output_cost_per_token": 5e-7, - "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", - "supports_function_calling": true, - "supports_response_schema": false - }, - "oci/xai.grok-3-mini-fast": { - "input_cost_per_token": 6e-7, - "litellm_provider": "oci", "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 6e-8, + "litellm_provider": "nebius", "mode": "chat", - "output_cost_per_token": 0.000004, - "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", - "supports_function_calling": true, - "supports_response_schema": false + "supports_vision": true, + "source": "https://nebius.com/prices-ai-studio" }, - "oci/xai.grok-4": { - "input_cost_per_token": 0.000003, - "litellm_provider": "oci", + "nvidia.nemotron-nano-12b-v2": { + "input_cost_per_token": 2e-7, + "litellm_provider": "bedrock_converse", "max_input_tokens": 128000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-7, - "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", - "supports_function_calling": true, - "supports_response_schema": false + "output_cost_per_token": 6e-7, + "supports_system_messages": true, + "supports_vision": true }, - "oci/cohere.command-latest": { - "input_cost_per_token": 0.00000156, - "litellm_provider": "oci", + "nvidia.nemotron-nano-9b-v2": { + "input_cost_per_token": 6e-8, + "litellm_provider": "bedrock_converse", "max_input_tokens": 128000, - "max_output_tokens": 4000, - "max_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.00000156, - "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "output_cost_per_token": 2.3e-7, + "supports_system_messages": true + }, + "nvidia.nemotron-nano-3-30b": { + "input_cost_per_token": 6e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.4e-7, "supports_function_calling": true, - "supports_response_schema": false + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" }, - "oci/cohere.command-a-03-2025": { - "input_cost_per_token": 0.00000156, - "litellm_provider": "oci", - "max_input_tokens": 256000, - "max_output_tokens": 4000, - "max_tokens": 256000, + "o1": { + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0.00000156, - "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "output_cost_per_token": 0.00006, "supports_function_calling": true, - "supports_response_schema": false + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true }, - "oci/cohere.command-plus-latest": { - "input_cost_per_token": 0.00000156, - "litellm_provider": "oci", - "max_input_tokens": 128000, - "max_output_tokens": 4000, - "max_tokens": 128000, + "o1-2024-12-17": { + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0.00000156, - "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "output_cost_per_token": 0.00006, "supports_function_calling": true, - "supports_response_schema": false + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true }, - "ollama/codegeex4": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 32768, - "max_output_tokens": 8192, - "max_tokens": 32768, + "o1-mini": { + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 0.0000011, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0, - "supports_function_calling": false + "output_cost_per_token": 0.0000044, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_vision": true }, - "ollama/deepseek-coder-v2-instruct": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 32768, - "max_output_tokens": 8192, - "max_tokens": 32768, + "o1-mini-2024-09-12": { + "deprecation_date": "2025-10-27", + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000003, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0, - "supports_function_calling": true + "output_cost_per_token": 0.000012, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_vision": true }, - "ollama/deepseek-coder-v2-lite-instruct": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 32768, - "max_output_tokens": 8192, + "o1-preview": { + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0, - "supports_function_calling": true + "output_cost_per_token": 0.00006, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_vision": true }, - "ollama/deepseek-v3.1:671b-cloud": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 163840, - "max_output_tokens": 163840, - "max_tokens": 163840, + "o1-preview-2024-09-12": { + "cache_read_input_token_cost": 0.0000075, + "input_cost_per_token": 0.000015, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0, - "supports_function_calling": true + "output_cost_per_token": 0.00006, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_vision": true }, - "ollama/gpt-oss:120b-cloud": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 131072, - "max_output_tokens": 131072, - "max_tokens": 131072, + "o3": { + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_flex": 2.5e-7, + "cache_read_input_token_cost_priority": 8.75e-7, + "input_cost_per_token": 0.000002, + "input_cost_per_token_flex": 0.000001, + "input_cost_per_token_priority": 0.0000035, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0, - "supports_function_calling": true + "output_cost_per_token": 0.000008, + "output_cost_per_token_flex": 0.000004, + "output_cost_per_token_priority": 0.000014, + "supported_endpoints": [ + "/v1/responses", + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true }, - "ollama/gpt-oss:20b-cloud": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 131072, - "max_output_tokens": 131072, - "max_tokens": 131072, + "o3-2025-04-16": { + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000002, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0, - "supports_function_calling": true - }, - "ollama/internlm2_5-20b-chat": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 32768, - "max_output_tokens": 8192, - "max_tokens": 32768, - "mode": "chat", - "output_cost_per_token": 0, - "supports_function_calling": true + "output_cost_per_token": 0.000008, + "supported_endpoints": [ + "/v1/responses", + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true }, - "ollama/llama2": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 4096, - "max_output_tokens": 4096, - "max_tokens": 4096, + "o3-mini": { + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 0.0000011, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0 + "output_cost_per_token": 0.0000044, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": false }, - "ollama/llama2:13b": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 4096, - "max_output_tokens": 4096, - "max_tokens": 4096, + "o3-mini-2025-01-31": { + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 0.0000011, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0 + "output_cost_per_token": 0.0000044, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": false }, - "ollama/llama2:70b": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 4096, - "max_output_tokens": 4096, - "max_tokens": 4096, + "o4-mini": { + "cache_read_input_token_cost": 2.75e-7, + "cache_read_input_token_cost_flex": 1.375e-7, + "cache_read_input_token_cost_priority": 5e-7, + "input_cost_per_token": 0.0000011, + "input_cost_per_token_flex": 5.5e-7, + "input_cost_per_token_priority": 0.000002, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0 + "output_cost_per_token": 0.0000044, + "output_cost_per_token_flex": 0.0000022, + "output_cost_per_token_priority": 0.000008, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true }, - "ollama/llama2:7b": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 4096, - "max_output_tokens": 4096, - "max_tokens": 4096, + "o4-mini-2025-04-16": { + "cache_read_input_token_cost": 2.75e-7, + "input_cost_per_token": 0.0000011, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0 + "output_cost_per_token": 0.0000044, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true }, - "ollama/llama3": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, + "oci/meta.llama-3.1-405b-instruct": { + "input_cost_per_token": 0.00001068, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 0 + "output_cost_per_token": 0.00001068, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false }, - "ollama/llama3.1": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 32768, + "oci/meta.llama-3.2-90b-vision-instruct": { + "input_cost_per_token": 0.000002, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 0, - "supports_function_calling": true + "output_cost_per_token": 0.000002, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false }, - "ollama/llama3:70b": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, + "oci/meta.llama-3.3-70b-instruct": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 0 + "output_cost_per_token": 7.2e-7, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false }, - "ollama/llama3:8b": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, + "oci/meta.llama-4-maverick-17b-128e-instruct-fp8": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "oci", + "max_input_tokens": 512000, + "max_output_tokens": 4000, + "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 0 + "output_cost_per_token": 7.2e-7, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false }, - "ollama/mistral-7B-Instruct-v0.1": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, + "oci/meta.llama-4-scout-17b-16e-instruct": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "oci", + "max_input_tokens": 192000, + "max_output_tokens": 4000, + "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 0, - "supports_function_calling": true + "output_cost_per_token": 7.2e-7, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false }, - "ollama/mistral-7B-Instruct-v0.2": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 32768, - "max_output_tokens": 32768, - "max_tokens": 32768, + "oci/xai.grok-3": { + "input_cost_per_token": 0.000003, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0, - "supports_function_calling": true + "output_cost_per_token": 0.000015, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false }, - "ollama/mistral-large-instruct-2407": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 65536, - "max_output_tokens": 8192, - "max_tokens": 65536, + "oci/xai.grok-3-fast": { + "input_cost_per_token": 0.000005, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0, - "supports_function_calling": true + "output_cost_per_token": 0.000025, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false }, - "ollama/mixtral-8x22B-Instruct-v0.1": { - "input_cost_per_token": 0, - "litellm_provider": "ollama", - "max_input_tokens": 65536, - "max_output_tokens": 65536, - "max_tokens": 65536, - "mode": "chat", + "oci/xai.grok-3-mini": { + "input_cost_per_token": 3e-7, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-7, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-3-mini-fast": { + "input_cost_per_token": 6e-7, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0.000004, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-4": { + "input_cost_per_token": 0.000003, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-latest": { + "input_cost_per_token": 0.00000156, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 0.00000156, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-a-03-2025": { + "input_cost_per_token": 0.00000156, + "litellm_provider": "oci", + "max_input_tokens": 256000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 0.00000156, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-plus-latest": { + "input_cost_per_token": 0.00000156, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 0.00000156, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "ollama/codegeex4": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": false + }, + "ollama/deepseek-coder-v2-instruct": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true + }, + "ollama/deepseek-coder-v2-lite-instruct": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true + }, + "ollama/deepseek-v3.1:671b-cloud": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true + }, + "ollama/gpt-oss:120b-cloud": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true + }, + "ollama/gpt-oss:20b-cloud": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true + }, + "ollama/internlm2_5-20b-chat": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true + }, + "ollama/llama2": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0 + }, + "ollama/llama2:13b": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0 + }, + "ollama/llama2:70b": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0 + }, + "ollama/llama2:7b": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0 + }, + "ollama/llama3": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0 + }, + "ollama/llama3.1": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true + }, + "ollama/llama3:70b": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0 + }, + "ollama/llama3:8b": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0 + }, + "ollama/mistral-7B-Instruct-v0.1": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true + }, + "ollama/mistral-7B-Instruct-v0.2": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true + }, + "ollama/mistral-large-instruct-2407": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true + }, + "ollama/mixtral-8x22B-Instruct-v0.1": { + "input_cost_per_token": 0, + "litellm_provider": "ollama", + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", "output_cost_per_token": 0, "supports_function_calling": true }, @@ -17500,39 +20810,9 @@ "output_cost_per_token": 2e-7, "supports_system_messages": true }, - "openrouter/anthropic/claude-2": { - "input_cost_per_token": 0.00001102, - "litellm_provider": "openrouter", - "max_output_tokens": 8191, - "max_tokens": 100000, - "mode": "chat", - "output_cost_per_token": 0.00003268, - "supports_tool_choice": true - }, - "openrouter/anthropic/claude-3-5-haiku": { - "input_cost_per_token": 0.000001, - "litellm_provider": "openrouter", - "max_tokens": 200000, - "mode": "chat", - "output_cost_per_token": 0.000005, - "supports_function_calling": true, - "supports_tool_choice": true - }, - "openrouter/anthropic/claude-3-5-haiku-20241022": { - "input_cost_per_token": 0.000001, - "litellm_provider": "openrouter", - "max_input_tokens": 200000, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 0.000005, - "supports_function_calling": true, - "supports_tool_choice": true, - "tool_use_system_prompt_tokens": 264 - }, - "openrouter/anthropic/claude-3-haiku": { - "input_cost_per_image": 0.0004, - "input_cost_per_token": 2.5e-7, + "openrouter/anthropic/claude-3-haiku": { + "input_cost_per_image": 0.0004, + "input_cost_per_token": 2.5e-7, "litellm_provider": "openrouter", "max_tokens": 200000, "mode": "chat", @@ -17541,43 +20821,6 @@ "supports_tool_choice": true, "supports_vision": true }, - "openrouter/anthropic/claude-3-haiku-20240307": { - "input_cost_per_token": 2.5e-7, - "litellm_provider": "openrouter", - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 0.00000125, - "supports_function_calling": true, - "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 264 - }, - "openrouter/anthropic/claude-3-opus": { - "input_cost_per_token": 0.000015, - "litellm_provider": "openrouter", - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 0.000075, - "supports_function_calling": true, - "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 395 - }, - "openrouter/anthropic/claude-3-sonnet": { - "input_cost_per_image": 0.0048, - "input_cost_per_token": 0.000003, - "litellm_provider": "openrouter", - "max_tokens": 200000, - "mode": "chat", - "output_cost_per_token": 0.000015, - "supports_function_calling": true, - "supports_tool_choice": true, - "supports_vision": true - }, "openrouter/anthropic/claude-3.5-sonnet": { "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", @@ -17593,20 +20836,6 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, - "openrouter/anthropic/claude-3.5-sonnet:beta": { - "input_cost_per_token": 0.000003, - "litellm_provider": "openrouter", - "max_input_tokens": 200000, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 0.000015, - "supports_computer_use": true, - "supports_function_calling": true, - "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 - }, "openrouter/anthropic/claude-3.7-sonnet": { "input_cost_per_image": 0.0048, "input_cost_per_token": 0.000003, @@ -17624,31 +20853,6 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, - "openrouter/anthropic/claude-3.7-sonnet:beta": { - "input_cost_per_image": 0.0048, - "input_cost_per_token": 0.000003, - "litellm_provider": "openrouter", - "max_input_tokens": 200000, - "max_output_tokens": 128000, - "max_tokens": 128000, - "mode": "chat", - "output_cost_per_token": 0.000015, - "supports_computer_use": true, - "supports_function_calling": true, - "supports_reasoning": true, - "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 - }, - "openrouter/anthropic/claude-instant-v1": { - "input_cost_per_token": 0.00000163, - "litellm_provider": "openrouter", - "max_output_tokens": 8191, - "max_tokens": 100000, - "mode": "chat", - "output_cost_per_token": 0.00000551, - "supports_tool_choice": true - }, "openrouter/anthropic/claude-opus-4": { "input_cost_per_image": 0.0048, "cache_creation_input_token_cost": 0.00001875, @@ -17714,6 +20918,30 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, + "openrouter/anthropic/claude-sonnet-4.6": { + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost": 3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "source": "https://openrouter.ai/anthropic/claude-sonnet-4.6", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, "openrouter/anthropic/claude-opus-4.5": { "cache_creation_input_token_cost": 0.00000625, "cache_read_input_token_cost": 5e-7, @@ -17733,6 +20961,25 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, + "openrouter/anthropic/claude-opus-4.6": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "openrouter/anthropic/claude-sonnet-4.5": { "input_cost_per_image": 0.0048, "cache_creation_input_token_cost": 0.00000375, @@ -17787,30 +21034,6 @@ "source": "https://openrouter.ai/api/v1/models/bytedance/ui-tars-1.5-7b", "supports_tool_choice": true }, - "openrouter/cognitivecomputations/dolphin-mixtral-8x7b": { - "input_cost_per_token": 5e-7, - "litellm_provider": "openrouter", - "max_tokens": 32769, - "mode": "chat", - "output_cost_per_token": 5e-7, - "supports_tool_choice": true - }, - "openrouter/cohere/command-r-plus": { - "input_cost_per_token": 0.000003, - "litellm_provider": "openrouter", - "max_tokens": 128000, - "mode": "chat", - "output_cost_per_token": 0.000015, - "supports_tool_choice": true - }, - "openrouter/databricks/dbrx-instruct": { - "input_cost_per_token": 6e-7, - "litellm_provider": "openrouter", - "max_tokens": 32768, - "mode": "chat", - "output_cost_per_token": 6e-7, - "supports_tool_choice": true - }, "openrouter/deepseek/deepseek-chat": { "input_cost_per_token": 1.4e-7, "litellm_provider": "openrouter", @@ -17839,7 +21062,7 @@ "litellm_provider": "openrouter", "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 8192, + "max_tokens": 163840, "mode": "chat", "output_cost_per_token": 8e-7, "supports_assistant_prefill": true, @@ -17854,7 +21077,7 @@ "litellm_provider": "openrouter", "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 8192, + "max_tokens": 163840, "mode": "chat", "output_cost_per_token": 4e-7, "supports_assistant_prefill": true, @@ -17869,7 +21092,7 @@ "litellm_provider": "openrouter", "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 8192, + "max_tokens": 163840, "mode": "chat", "output_cost_per_token": 4e-7, "supports_assistant_prefill": true, @@ -17878,17 +21101,6 @@ "supports_reasoning": false, "supports_tool_choice": true }, - "openrouter/deepseek/deepseek-coder": { - "input_cost_per_token": 1.4e-7, - "litellm_provider": "openrouter", - "max_input_tokens": 66000, - "max_output_tokens": 4096, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 2.8e-7, - "supports_prompt_caching": true, - "supports_tool_choice": true - }, "openrouter/deepseek/deepseek-r1": { "input_cost_per_token": 5.5e-7, "input_cost_per_token_cache_hit": 1.4e-7, @@ -17919,15 +21131,8 @@ "supports_reasoning": true, "supports_tool_choice": true }, - "openrouter/fireworks/firellava-13b": { - "input_cost_per_token": 2e-7, - "litellm_provider": "openrouter", - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 2e-7, - "supports_tool_choice": true - }, "openrouter/google/gemini-2.0-flash-001": { + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7e-7, "input_cost_per_token": 1e-7, "litellm_provider": "openrouter", @@ -18040,62 +21245,94 @@ "supports_vision": true, "supports_web_search": true }, - "openrouter/google/gemini-pro-1.5": { - "input_cost_per_image": 0.00265, - "input_cost_per_token": 0.0000025, + "openrouter/google/gemini-3-flash-preview": { + "cache_read_input_token_cost": 5e-8, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 5e-7, "litellm_provider": "openrouter", - "max_input_tokens": 1000000, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, "mode": "chat", - "output_cost_per_token": 0.0000075, + "output_cost_per_reasoning_token": 0.000003, + "output_cost_per_token": 0.000003, + "rpm": 2000, + "source": "https://ai.google.dev/pricing/gemini-3", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, "supports_tool_choice": true, - "supports_vision": true + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000 }, - "openrouter/google/gemini-pro-vision": { - "input_cost_per_image": 0.0025, - "input_cost_per_token": 1.25e-7, + "openrouter/google/gemini-3.1-pro-preview": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, "litellm_provider": "openrouter", - "max_tokens": 45875, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 3.75e-7, + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "source": "https://openrouter.ai/google/gemini-3.1-pro-preview", + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, "supports_tool_choice": true, "supports_vision": true }, - "openrouter/google/palm-2-chat-bison": { - "input_cost_per_token": 5e-7, + "openrouter/gryphe/mythomax-l2-13b": { + "input_cost_per_token": 0.000001875, "litellm_provider": "openrouter", - "max_tokens": 25804, - "mode": "chat", - "output_cost_per_token": 5e-7, - "supports_tool_choice": true - }, - "openrouter/google/palm-2-codechat-bison": { - "input_cost_per_token": 5e-7, - "litellm_provider": "openrouter", - "max_tokens": 20070, - "mode": "chat", - "output_cost_per_token": 5e-7, - "supports_tool_choice": true - }, - "openrouter/gryphe/mythomax-l2-13b": { - "input_cost_per_token": 0.000001875, - "litellm_provider": "openrouter", - "max_tokens": 8192, + "max_tokens": 8192, "mode": "chat", "output_cost_per_token": 0.000001875, "supports_tool_choice": true }, - "openrouter/jondurbin/airoboros-l2-70b-2.1": { - "input_cost_per_token": 0.000013875, - "litellm_provider": "openrouter", - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 0.000013875, - "supports_tool_choice": true - }, "openrouter/mancer/weaver": { "input_cost_per_token": 0.000005625, "litellm_provider": "openrouter", @@ -18104,30 +21341,6 @@ "output_cost_per_token": 0.000005625, "supports_tool_choice": true }, - "openrouter/meta-llama/codellama-34b-instruct": { - "input_cost_per_token": 5e-7, - "litellm_provider": "openrouter", - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 5e-7, - "supports_tool_choice": true - }, - "openrouter/meta-llama/llama-2-13b-chat": { - "input_cost_per_token": 2e-7, - "litellm_provider": "openrouter", - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 2e-7, - "supports_tool_choice": true - }, - "openrouter/meta-llama/llama-2-70b-chat": { - "input_cost_per_token": 0.0000015, - "litellm_provider": "openrouter", - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 0.0000015, - "supports_tool_choice": true - }, "openrouter/meta-llama/llama-3-70b-instruct": { "input_cost_per_token": 5.9e-7, "litellm_provider": "openrouter", @@ -18136,72 +21349,26 @@ "output_cost_per_token": 7.9e-7, "supports_tool_choice": true }, - "openrouter/meta-llama/llama-3-70b-instruct:nitro": { - "input_cost_per_token": 9e-7, - "litellm_provider": "openrouter", - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 9e-7, - "supports_tool_choice": true - }, - "openrouter/meta-llama/llama-3-8b-instruct:extended": { - "input_cost_per_token": 2.25e-7, - "litellm_provider": "openrouter", - "max_tokens": 16384, - "mode": "chat", - "output_cost_per_token": 0.00000225, - "supports_tool_choice": true - }, - "openrouter/meta-llama/llama-3-8b-instruct:free": { - "input_cost_per_token": 0, - "litellm_provider": "openrouter", - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 0, - "supports_tool_choice": true - }, - "openrouter/microsoft/wizardlm-2-8x22b:nitro": { - "input_cost_per_token": 0.000001, - "litellm_provider": "openrouter", - "max_tokens": 65536, - "mode": "chat", - "output_cost_per_token": 0.000001, - "supports_tool_choice": true - }, "openrouter/minimax/minimax-m2": { "input_cost_per_token": 2.55e-7, "litellm_provider": "openrouter", "max_input_tokens": 204800, "max_output_tokens": 204800, - "max_tokens": 32768, + "max_tokens": 204800, "mode": "chat", "output_cost_per_token": 0.00000102, "supports_function_calling": true, - "supports_prompt_caching": false, + "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true }, - "openrouter/mistralai/devstral-2512:free": { - "input_cost_per_image": 0, - "input_cost_per_token": 0, - "litellm_provider": "openrouter", - "max_input_tokens": 262144, - "max_output_tokens": 262144, - "max_tokens": 262144, - "mode": "chat", - "output_cost_per_token": 0, - "supports_function_calling": true, - "supports_prompt_caching": false, - "supports_tool_choice": true, - "supports_vision": false - }, "openrouter/mistralai/devstral-2512": { "input_cost_per_image": 0, "input_cost_per_token": 1.5e-7, "litellm_provider": "openrouter", "max_input_tokens": 262144, "max_output_tokens": 65536, - "max_tokens": 262144, + "max_tokens": 65536, "mode": "chat", "output_cost_per_token": 6e-7, "supports_function_calling": true, @@ -18273,14 +21440,6 @@ "output_cost_per_token": 1.3e-7, "supports_tool_choice": true }, - "openrouter/mistralai/mistral-7b-instruct:free": { - "input_cost_per_token": 0, - "litellm_provider": "openrouter", - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 0, - "supports_tool_choice": true - }, "openrouter/mistralai/mistral-large": { "input_cost_per_token": 0.000008, "litellm_provider": "openrouter", @@ -18313,13 +21472,20 @@ "output_cost_per_token": 6.5e-7, "supports_tool_choice": true }, - "openrouter/nousresearch/nous-hermes-llama2-13b": { - "input_cost_per_token": 2e-7, + "openrouter/moonshotai/kimi-k2.5": { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 6e-7, "litellm_provider": "openrouter", - "max_tokens": 4096, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 2e-7, - "supports_tool_choice": true + "output_cost_per_token": 0.000003, + "source": "https://openrouter.ai/moonshotai/kimi-k2.5", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true }, "openrouter/openai/gpt-3.5-turbo": { "input_cost_per_token": 0.0000015, @@ -18345,17 +21511,6 @@ "output_cost_per_token": 0.00006, "supports_tool_choice": true }, - "openrouter/openai/gpt-4-vision-preview": { - "input_cost_per_image": 0.01445, - "input_cost_per_token": 0.00001, - "litellm_provider": "openrouter", - "max_tokens": 130000, - "mode": "chat", - "output_cost_per_token": 0.00003, - "supports_function_calling": true, - "supports_tool_choice": true, - "supports_vision": true - }, "openrouter/openai/gpt-4.1": { "cache_read_input_token_cost": 5e-7, "input_cost_per_token": 0.000002, @@ -18373,23 +21528,6 @@ "supports_tool_choice": true, "supports_vision": true }, - "openrouter/openai/gpt-4.1-2025-04-14": { - "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 0.000002, - "litellm_provider": "openrouter", - "max_input_tokens": 1047576, - "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", - "output_cost_per_token": 0.000008, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_prompt_caching": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true - }, "openrouter/openai/gpt-4.1-mini": { "cache_read_input_token_cost": 1e-7, "input_cost_per_token": 4e-7, @@ -18407,23 +21545,6 @@ "supports_tool_choice": true, "supports_vision": true }, - "openrouter/openai/gpt-4.1-mini-2025-04-14": { - "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 4e-7, - "litellm_provider": "openrouter", - "max_input_tokens": 1047576, - "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", - "output_cost_per_token": 0.0000016, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_prompt_caching": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true - }, "openrouter/openai/gpt-4.1-nano": { "cache_read_input_token_cost": 2.5e-8, "input_cost_per_token": 1e-7, @@ -18441,23 +21562,6 @@ "supports_tool_choice": true, "supports_vision": true }, - "openrouter/openai/gpt-4.1-nano-2025-04-14": { - "cache_read_input_token_cost": 2.5e-8, - "input_cost_per_token": 1e-7, - "litellm_provider": "openrouter", - "max_input_tokens": 1047576, - "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", - "output_cost_per_token": 4e-7, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_prompt_caching": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true - }, "openrouter/openai/gpt-4o": { "input_cost_per_token": 0.0000025, "litellm_provider": "openrouter", @@ -18488,9 +21592,9 @@ "cache_read_input_token_cost": 1.25e-7, "input_cost_per_token": 0.00000125, "litellm_provider": "openrouter", - "max_input_tokens": 272000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, "supported_modalities": [ @@ -18522,6 +21626,25 @@ "supports_reasoning": true, "supports_tool_choice": true }, + "openrouter/openai/gpt-5.2-codex": { + "cache_read_input_token_cost": 1.75e-7, + "input_cost_per_token": 0.00000175, + "litellm_provider": "openrouter", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000014, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_reasoning": true, + "supports_tool_choice": true + }, "openrouter/openai/gpt-5": { "cache_read_input_token_cost": 1.25e-7, "input_cost_per_token": 0.00000125, @@ -18579,14 +21702,37 @@ "supports_reasoning": true, "supports_tool_choice": true }, + "openrouter/openai/gpt-5.1-codex-max": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 0.00000125, + "litellm_provider": "openrouter", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00001, + "source": "https://openrouter.ai/openai/gpt-5.1-codex-max", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, "openrouter/openai/gpt-5.2": { "input_cost_per_image": 0, "cache_read_input_token_cost": 1.75e-7, "input_cost_per_token": 0.00000175, "litellm_provider": "openrouter", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, - "max_tokens": 400000, + "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000014, "supports_function_calling": true, @@ -18602,7 +21748,7 @@ "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 16384, - "max_tokens": 128000, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.000014, "supports_function_calling": true, @@ -18614,9 +21760,9 @@ "input_cost_per_image": 0, "input_cost_per_token": 0.000021, "litellm_provider": "openrouter", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, - "max_tokens": 400000, + "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000168, "supports_function_calling": true, @@ -18641,13 +21787,13 @@ "supports_tool_choice": true }, "openrouter/openai/gpt-oss-20b": { - "input_cost_per_token": 1.8e-7, + "input_cost_per_token": 2e-8, "litellm_provider": "openrouter", "max_input_tokens": 131072, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 8e-7, + "output_cost_per_token": 1e-7, "source": "https://openrouter.ai/openai/gpt-oss-20b", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -18672,126 +21818,104 @@ "supports_tool_choice": true, "supports_vision": true }, - "openrouter/openai/o1-mini": { - "input_cost_per_token": 0.000003, + "openrouter/openai/o3-mini": { + "input_cost_per_token": 0.0000011, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.000012, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_reasoning": true, "supports_tool_choice": true, "supports_vision": false }, - "openrouter/openai/o1-mini-2024-09-12": { - "input_cost_per_token": 0.000003, + "openrouter/openai/o3-mini-high": { + "input_cost_per_token": 0.0000011, "litellm_provider": "openrouter", "max_input_tokens": 128000, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.000012, + "output_cost_per_token": 0.0000044, "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_reasoning": true, "supports_tool_choice": true, "supports_vision": false }, - "openrouter/openai/o1-preview": { - "input_cost_per_token": 0.000015, + "openrouter/qwen/qwen-2.5-coder-32b-instruct": { + "input_cost_per_token": 1.8e-7, "litellm_provider": "openrouter", - "max_input_tokens": 128000, - "max_output_tokens": 32768, - "max_tokens": 32768, + "max_input_tokens": 33792, + "max_output_tokens": 33792, + "max_tokens": 33792, "mode": "chat", - "output_cost_per_token": 0.00006, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_tool_choice": true, - "supports_vision": false + "output_cost_per_token": 1.8e-7, + "supports_tool_choice": true }, - "openrouter/openai/o1-preview-2024-09-12": { - "input_cost_per_token": 0.000015, + "openrouter/qwen/qwen-vl-plus": { + "input_cost_per_token": 2.1e-7, "litellm_provider": "openrouter", - "max_input_tokens": 128000, - "max_output_tokens": 32768, - "max_tokens": 32768, + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 0.00006, - "supports_function_calling": true, - "supports_parallel_function_calling": true, + "output_cost_per_token": 6.3e-7, "supports_tool_choice": true, - "supports_vision": false + "supports_vision": true }, - "openrouter/openai/o3-mini": { - "input_cost_per_token": 0.0000011, + "openrouter/qwen/qwen3-coder": { + "input_cost_per_token": 2.2e-7, "litellm_provider": "openrouter", - "max_input_tokens": 128000, - "max_output_tokens": 65536, - "max_tokens": 65536, + "max_input_tokens": 262100, + "max_output_tokens": 262100, + "max_tokens": 262100, "mode": "chat", - "output_cost_per_token": 0.0000044, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_reasoning": true, + "output_cost_per_token": 9.5e-7, + "source": "https://openrouter.ai/qwen/qwen3-coder", "supports_tool_choice": true, - "supports_vision": false + "supports_function_calling": true }, - "openrouter/openai/o3-mini-high": { - "input_cost_per_token": 0.0000011, + "openrouter/qwen/qwen3-coder-plus": { + "input_cost_per_token": 0.000001, "litellm_provider": "openrouter", - "max_input_tokens": 128000, + "max_input_tokens": 997952, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.0000044, + "output_cost_per_token": 0.000005, + "source": "https://openrouter.ai/qwen/qwen3-coder-plus", "supports_function_calling": true, - "supports_parallel_function_calling": true, "supports_reasoning": true, - "supports_tool_choice": true, - "supports_vision": false - }, - "openrouter/pygmalionai/mythalion-13b": { - "input_cost_per_token": 0.000001875, - "litellm_provider": "openrouter", - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 0.000001875, "supports_tool_choice": true }, - "openrouter/qwen/qwen-2.5-coder-32b-instruct": { - "input_cost_per_token": 1.8e-7, + "openrouter/qwen/qwen3-235b-a22b-2507": { + "input_cost_per_token": 7.1e-8, "litellm_provider": "openrouter", - "max_input_tokens": 33792, - "max_output_tokens": 33792, - "max_tokens": 33792, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 1.8e-7, + "output_cost_per_token": 1e-7, + "source": "https://openrouter.ai/qwen/qwen3-235b-a22b-2507", + "supports_function_calling": true, "supports_tool_choice": true }, - "openrouter/qwen/qwen-vl-plus": { - "input_cost_per_token": 2.1e-7, - "litellm_provider": "openrouter", - "max_input_tokens": 8192, - "max_output_tokens": 2048, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 6.3e-7, - "supports_tool_choice": true, - "supports_vision": true - }, - "openrouter/qwen/qwen3-coder": { - "input_cost_per_token": 2.2e-7, + "openrouter/qwen/qwen3-235b-a22b-thinking-2507": { + "input_cost_per_token": 1.1e-7, "litellm_provider": "openrouter", - "max_input_tokens": 262100, - "max_output_tokens": 262100, - "max_tokens": 262100, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 9.5e-7, - "source": "https://openrouter.ai/qwen/qwen3-coder", - "supports_tool_choice": true, - "supports_function_calling": true + "output_cost_per_token": 6e-7, + "source": "https://openrouter.ai/qwen/qwen3-235b-a22b-thinking-2507", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true }, "openrouter/switchpoint/router": { "input_cost_per_token": 8.5e-7, @@ -18826,30 +21950,17 @@ "supports_tool_choice": true, "supports_web_search": true }, - "openrouter/x-ai/grok-4-fast:free": { - "input_cost_per_token": 0, - "litellm_provider": "openrouter", - "max_input_tokens": 2000000, - "max_output_tokens": 30000, - "max_tokens": 2000000, - "mode": "chat", - "output_cost_per_token": 0, - "source": "https://openrouter.ai/x-ai/grok-4-fast:free", - "supports_function_calling": true, - "supports_reasoning": true, - "supports_tool_choice": true, - "supports_web_search": false - }, "openrouter/z-ai/glm-4.6": { "input_cost_per_token": 4e-7, "litellm_provider": "openrouter", "max_input_tokens": 202800, "max_output_tokens": 131000, - "max_tokens": 202800, + "max_tokens": 131000, "mode": "chat", "output_cost_per_token": 0.00000175, "source": "https://openrouter.ai/z-ai/glm-4.6", "supports_function_calling": true, + "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true }, @@ -18858,14 +21969,147 @@ "litellm_provider": "openrouter", "max_input_tokens": 202800, "max_output_tokens": 131000, - "max_tokens": 202800, + "max_tokens": 131000, "mode": "chat", "output_cost_per_token": 0.0000019, "source": "https://openrouter.ai/z-ai/glm-4.6:exacto", "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/xiaomi/mimo-v2-flash": { + "input_cost_per_token": 9e-8, + "output_cost_per_token": 2.9e-7, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": false, + "supports_prompt_caching": false + }, + "openrouter/z-ai/glm-4.7": { + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000015, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 202752, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": true, + "supports_prompt_caching": false, + "supports_assistant_prefill": true + }, + "openrouter/z-ai/glm-4.7-flash": { + "input_cost_per_token": 7e-8, + "output_cost_per_token": 4e-7, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": true, + "supports_prompt_caching": false + }, + "openrouter/z-ai/glm-5": { + "input_cost_per_token": 8e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 202752, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00000256, + "source": "https://openrouter.ai/z-ai/glm-5", + "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true }, + "openrouter/minimax/minimax-m2.1": { + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 0.0000012, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 204000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": true, + "supports_prompt_caching": false, + "supports_computer_use": false + }, + "openrouter/minimax/minimax-m2.5": { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000011, + "cache_read_input_token_cost": 1.5e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 196608, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://openrouter.ai/minimax/minimax-m2.5", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": false, + "supports_prompt_caching": true, + "supports_computer_use": false + }, + "openrouter/openrouter/auto": { + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 2000000, + "max_tokens": 2000000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_audio_input": true, + "supports_video_input": true + }, + "openrouter/openrouter/free": { + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_tokens": 200000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_vision": true + }, + "openrouter/openrouter/bodybuilder": { + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "chat" + }, "ovhcloud/DeepSeek-R1-Distill-Llama-70B": { "input_cost_per_token": 6.7e-7, "litellm_provider": "ovhcloud", @@ -19358,31 +22602,31 @@ "litellm_provider": "publicai", "max_input_tokens": 8192, "max_output_tokens": 4096, - "max_tokens": 8192, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0, "source": "https://platform.publicai.co/docs", - "supports_function_calling": true, - "supports_tool_choice": true + "supports_function_calling": false, + "supports_tool_choice": false }, "publicai/swiss-ai/apertus-70b-instruct": { "input_cost_per_token": 0, "litellm_provider": "publicai", "max_input_tokens": 8192, "max_output_tokens": 4096, - "max_tokens": 8192, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0, "source": "https://platform.publicai.co/docs", - "supports_function_calling": true, - "supports_tool_choice": true + "supports_function_calling": false, + "supports_tool_choice": false }, "publicai/aisingapore/Gemma-SEA-LION-v4-27B-IT": { "input_cost_per_token": 0, "litellm_provider": "publicai", "max_input_tokens": 8192, "max_output_tokens": 4096, - "max_tokens": 8192, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0, "source": "https://platform.publicai.co/docs", @@ -19394,7 +22638,7 @@ "litellm_provider": "publicai", "max_input_tokens": 16384, "max_output_tokens": 4096, - "max_tokens": 16384, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0, "source": "https://platform.publicai.co/docs", @@ -19406,7 +22650,7 @@ "litellm_provider": "publicai", "max_input_tokens": 8192, "max_output_tokens": 4096, - "max_tokens": 8192, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0, "source": "https://platform.publicai.co/docs", @@ -19418,7 +22662,7 @@ "litellm_provider": "publicai", "max_input_tokens": 32768, "max_output_tokens": 4096, - "max_tokens": 32768, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0, "source": "https://platform.publicai.co/docs", @@ -19430,7 +22674,7 @@ "litellm_provider": "publicai", "max_input_tokens": 32768, "max_output_tokens": 4096, - "max_tokens": 32768, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0, "source": "https://platform.publicai.co/docs", @@ -19442,7 +22686,7 @@ "litellm_provider": "publicai", "max_input_tokens": 32768, "max_output_tokens": 4096, - "max_tokens": 32768, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0, "source": "https://platform.publicai.co/docs", @@ -19455,7 +22699,7 @@ "litellm_provider": "publicai", "max_input_tokens": 32768, "max_output_tokens": 4096, - "max_tokens": 32768, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0, "source": "https://platform.publicai.co/docs", @@ -19468,7 +22712,7 @@ "litellm_provider": "bedrock_converse", "max_input_tokens": 262000, "max_output_tokens": 65536, - "max_tokens": 262144, + "max_tokens": 65536, "mode": "chat", "output_cost_per_token": 0.0000018, "supports_function_calling": true, @@ -19480,7 +22724,7 @@ "litellm_provider": "bedrock_converse", "max_input_tokens": 262144, "max_output_tokens": 131072, - "max_tokens": 262144, + "max_tokens": 131072, "mode": "chat", "output_cost_per_token": 8.8e-7, "supports_function_calling": true, @@ -19492,7 +22736,7 @@ "litellm_provider": "bedrock_converse", "max_input_tokens": 262144, "max_output_tokens": 131072, - "max_tokens": 262144, + "max_tokens": 131072, "mode": "chat", "output_cost_per_token": 6e-7, "supports_function_calling": true, @@ -19504,7 +22748,7 @@ "litellm_provider": "bedrock_converse", "max_input_tokens": 131072, "max_output_tokens": 16384, - "max_tokens": 131072, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 6e-7, "supports_function_calling": true, @@ -19534,6 +22778,19 @@ "supports_system_messages": true, "supports_vision": true }, + "qwen.qwen3-coder-next": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "replicate/meta/llama-2-13b": { "input_cost_per_token": 1e-7, "litellm_provider": "replicate", @@ -19664,12 +22921,306 @@ "output_cost_per_token": 0.000001, "supports_tool_choice": true }, - "sagemaker/meta-textgeneration-llama-2-13b-f": { - "input_cost_per_token": 0, - "litellm_provider": "sagemaker", - "max_input_tokens": 4096, - "max_output_tokens": 4096, - "max_tokens": 4096, + "replicate/openai/gpt-5": { + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicateopenai/gpt-oss-20b": { + "input_cost_per_token": 9e-8, + "output_cost_per_token": 3.6e-7, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/anthropic/claude-4.5-haiku": { + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/ibm-granite/granite-3.3-8b-instruct": { + "input_cost_per_token": 3e-8, + "output_cost_per_token": 2.5e-7, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/openai/gpt-4o": { + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_audio_input": true, + "supports_audio_output": true + }, + "replicate/openai/o4-mini": { + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000004, + "output_cost_per_reasoning_token": 0.000004, + "litellm_provider": "replicate", + "mode": "chat", + "supports_reasoning": true, + "supports_system_messages": true + }, + "replicate/openai/o1-mini": { + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "output_cost_per_reasoning_token": 0.0000044, + "litellm_provider": "replicate", + "mode": "chat", + "supports_reasoning": true, + "supports_system_messages": true + }, + "replicate/openai/o1": { + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "output_cost_per_reasoning_token": 0.00006, + "litellm_provider": "replicate", + "mode": "chat", + "supports_reasoning": true, + "supports_system_messages": true + }, + "replicate/openai/gpt-4o-mini": { + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/qwen/qwen3-235b-a22b-instruct-2507": { + "input_cost_per_token": 2.64e-7, + "output_cost_per_token": 0.00000106, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/anthropic/claude-4-sonnet": { + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/deepseek-ai/deepseek-v3": { + "input_cost_per_token": 0.00000145, + "output_cost_per_token": 0.00000145, + "litellm_provider": "replicate", + "mode": "chat", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/anthropic/claude-3.7-sonnet": { + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/anthropic/claude-3.5-haiku": { + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/anthropic/claude-3.5-sonnet": { + "input_cost_per_token": 0.00000375, + "output_cost_per_token": 0.00001875, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/google/gemini-3-pro": { + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000012, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/anthropic/claude-4.5-sonnet": { + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/openai/gpt-4.1": { + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/openai/gpt-4.1-nano": { + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/openai/gpt-4.1-mini": { + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/openai/gpt-5-nano": { + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4e-7, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/openai/gpt-5-mini": { + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.000002, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/google/gemini-2.5-flash": { + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.0000025, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/openai/gpt-oss-120b": { + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 7.2e-7, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/deepseek-ai/deepseek-v3.1": { + "input_cost_per_token": 6.72e-7, + "output_cost_per_token": 0.000002016, + "litellm_provider": "replicate", + "mode": "chat", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true + }, + "replicate/xai/grok-4": { + "input_cost_per_token": 0.0000072, + "output_cost_per_token": 0.000036, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/deepseek-ai/deepseek-r1": { + "input_cost_per_token": 0.00000375, + "output_cost_per_token": 0.00001, + "output_cost_per_reasoning_token": 0.00001, + "litellm_provider": "replicate", + "mode": "chat", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_reasoning": true, + "supports_system_messages": true + }, + "sagemaker/meta-textgeneration-llama-2-13b-f": { + "input_cost_per_token": 0, + "litellm_provider": "sagemaker", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0 }, @@ -19890,7 +23441,7 @@ "litellm_provider": "snowflake", "max_input_tokens": 18000, "max_output_tokens": 8192, - "max_tokens": 18000, + "max_tokens": 8192, "mode": "chat", "supports_computer_use": true }, @@ -19898,7 +23449,7 @@ "litellm_provider": "snowflake", "max_input_tokens": 32768, "max_output_tokens": 8192, - "max_tokens": 32768, + "max_tokens": 8192, "mode": "chat", "supports_reasoning": true }, @@ -19906,154 +23457,154 @@ "litellm_provider": "snowflake", "max_input_tokens": 8000, "max_output_tokens": 8192, - "max_tokens": 8000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/jamba-1.5-large": { "litellm_provider": "snowflake", "max_input_tokens": 256000, "max_output_tokens": 8192, - "max_tokens": 256000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/jamba-1.5-mini": { "litellm_provider": "snowflake", "max_input_tokens": 256000, "max_output_tokens": 8192, - "max_tokens": 256000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/jamba-instruct": { "litellm_provider": "snowflake", "max_input_tokens": 256000, "max_output_tokens": 8192, - "max_tokens": 256000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/llama2-70b-chat": { "litellm_provider": "snowflake", "max_input_tokens": 4096, "max_output_tokens": 8192, - "max_tokens": 4096, + "max_tokens": 8192, "mode": "chat" }, "snowflake/llama3-70b": { "litellm_provider": "snowflake", "max_input_tokens": 8000, "max_output_tokens": 8192, - "max_tokens": 8000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/llama3-8b": { "litellm_provider": "snowflake", "max_input_tokens": 8000, "max_output_tokens": 8192, - "max_tokens": 8000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/llama3.1-405b": { "litellm_provider": "snowflake", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/llama3.1-70b": { "litellm_provider": "snowflake", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/llama3.1-8b": { "litellm_provider": "snowflake", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/llama3.2-1b": { "litellm_provider": "snowflake", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/llama3.2-3b": { "litellm_provider": "snowflake", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/llama3.3-70b": { "litellm_provider": "snowflake", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/mistral-7b": { "litellm_provider": "snowflake", "max_input_tokens": 32000, "max_output_tokens": 8192, - "max_tokens": 32000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/mistral-large": { "litellm_provider": "snowflake", "max_input_tokens": 32000, "max_output_tokens": 8192, - "max_tokens": 32000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/mistral-large2": { "litellm_provider": "snowflake", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/mixtral-8x7b": { "litellm_provider": "snowflake", "max_input_tokens": 32000, "max_output_tokens": 8192, - "max_tokens": 32000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/reka-core": { "litellm_provider": "snowflake", "max_input_tokens": 32000, "max_output_tokens": 8192, - "max_tokens": 32000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/reka-flash": { "litellm_provider": "snowflake", "max_input_tokens": 100000, "max_output_tokens": 8192, - "max_tokens": 100000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/snowflake-arctic": { "litellm_provider": "snowflake", "max_input_tokens": 4096, "max_output_tokens": 8192, - "max_tokens": 4096, + "max_tokens": 8192, "mode": "chat" }, "snowflake/snowflake-llama-3.1-405b": { "litellm_provider": "snowflake", "max_input_tokens": 8000, "max_output_tokens": 8192, - "max_tokens": 8000, + "max_tokens": 8192, "mode": "chat" }, "snowflake/snowflake-llama-3.3-70b": { "litellm_provider": "snowflake", "max_input_tokens": 8000, "max_output_tokens": 8192, - "max_tokens": 8000, + "max_tokens": 8192, "mode": "chat" }, "together-ai-21.1b-41b": { @@ -20098,6 +23649,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/Qwen/Qwen2.5-7B-Instruct-Turbo": { @@ -20105,6 +23657,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/Qwen/Qwen3-235B-A22B-Instruct-2507-tput": { @@ -20116,6 +23669,7 @@ "source": "https://www.together.ai/models/qwen3-235b-a22b-instruct-2507-fp8", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/Qwen/Qwen3-235B-A22B-Thinking-2507": { @@ -20127,6 +23681,7 @@ "source": "https://www.together.ai/models/qwen3-235b-a22b-thinking-2507", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/Qwen/Qwen3-235B-A22B-fp8-tput": { @@ -20149,6 +23704,7 @@ "source": "https://www.together.ai/models/qwen3-coder-480b-a35b-instruct", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/deepseek-ai/DeepSeek-R1": { @@ -20161,6 +23717,7 @@ "output_cost_per_token": 0.000007, "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/deepseek-ai/DeepSeek-R1-0528-tput": { @@ -20172,6 +23729,7 @@ "source": "https://www.together.ai/models/deepseek-r1-0528-throughput", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/deepseek-ai/DeepSeek-V3": { @@ -20184,6 +23742,7 @@ "output_cost_per_token": 0.00000125, "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/deepseek-ai/DeepSeek-V3.1": { @@ -20203,6 +23762,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo": { @@ -20232,6 +23792,7 @@ "output_cost_per_token": 8.5e-7, "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/meta-llama/Llama-4-Scout-17B-16E-Instruct": { @@ -20241,6 +23802,7 @@ "output_cost_per_token": 5.9e-7, "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": { @@ -20250,6 +23812,7 @@ "output_cost_per_token": 0.0000035, "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { @@ -20305,6 +23868,7 @@ "source": "https://www.together.ai/models/kimi-k2-instruct", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/openai/gpt-oss-120b": { @@ -20316,6 +23880,7 @@ "source": "https://www.together.ai/models/gpt-oss-120b", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/openai/gpt-oss-20b": { @@ -20327,6 +23892,7 @@ "source": "https://www.together.ai/models/gpt-oss-20b", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/togethercomputer/CodeLlama-34b-Instruct": { @@ -20345,6 +23911,7 @@ "source": "https://www.together.ai/models/glm-4-5-air", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/zai-org/GLM-4.6": { @@ -20361,6 +23928,34 @@ "supports_reasoning": true, "supports_tool_choice": true }, + "together_ai/zai-org/GLM-4.7": { + "input_cost_per_token": 4.5e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 0.000002, + "source": "https://www.together.ai/models/glm-4-7", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "together_ai/moonshotai/Kimi-K2.5": { + "input_cost_per_token": 5e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 0.0000028, + "source": "https://www.together.ai/models/kimi-k2-5", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_reasoning": true + }, "together_ai/moonshotai/Kimi-K2-Instruct-0905": { "input_cost_per_token": 0.000001, "litellm_provider": "together_ai", @@ -20381,6 +23976,7 @@ "source": "https://www.together.ai/models/qwen3-next-80b-a3b-instruct", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "together_ai/Qwen/Qwen3-Next-80B-A3B-Thinking": { @@ -20392,6 +23988,19 @@ "source": "https://www.together.ai/models/qwen3-next-80b-a3b-thinking", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3.5-397B-A17B": { + "input_cost_per_token": 6e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000036, + "source": "https://www.together.ai/models/Qwen/Qwen3.5-397B-A17B", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true }, "us.amazon.nova-lite-v1:0": { @@ -20499,7 +24108,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-7, + "cache_creation_input_token_cost": 0.00000375 }, "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { "cache_creation_input_token_cost": 0.00000375, @@ -20552,7 +24163,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 2.5e-8, + "cache_creation_input_token_cost": 3.125e-7 }, "us.anthropic.claude-3-opus-20240229-v1:0": { "input_cost_per_token": 0.000015, @@ -20565,7 +24178,9 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 0.0000015, + "cache_creation_input_token_cost": 0.00001875 }, "us.anthropic.claude-3-sonnet-20240229-v1:0": { "input_cost_per_token": 0.000003, @@ -20579,7 +24194,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-7, + "cache_creation_input_token_cost": 0.00000375 }, "us.anthropic.claude-opus-4-1-20250805-v1:0": { "cache_creation_input_token_cost": 0.00001875, @@ -20685,15 +24302,15 @@ "tool_use_system_prompt_tokens": 159 }, "us.anthropic.claude-opus-4-5-20251101-v1:0": { - "cache_creation_input_token_cost": 0.00000625, - "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 0.000005, + "cache_creation_input_token_cost": 0.000006875, + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 0.0000055, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 0.000025, + "output_cost_per_token": 0.0000275, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -20804,12 +24421,36 @@ "supports_reasoning": true, "supports_tool_choice": false }, + "us.deepseek.v3.2": { + "input_cost_per_token": 6.2e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000185, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "eu.deepseek.v3.2": { + "input_cost_per_token": 7.4e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000222, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, "us.meta.llama3-1-405b-instruct-v1:0": { "input_cost_per_token": 0.00000532, "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.000016, "supports_function_calling": true, @@ -20820,7 +24461,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 2048, - "max_tokens": 128000, + "max_tokens": 2048, "mode": "chat", "output_cost_per_token": 9.9e-7, "supports_function_calling": true, @@ -20831,7 +24472,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 2048, - "max_tokens": 128000, + "max_tokens": 2048, "mode": "chat", "output_cost_per_token": 2.2e-7, "supports_function_calling": true, @@ -20842,7 +24483,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 3.5e-7, "supports_function_calling": true, @@ -20854,7 +24495,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 1e-7, "supports_function_calling": true, @@ -20865,7 +24506,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 1.5e-7, "supports_function_calling": true, @@ -20876,7 +24517,7 @@ "litellm_provider": "bedrock", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.000002, "supports_function_calling": true, @@ -20941,7 +24582,7 @@ "litellm_provider": "bedrock_converse", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.000006, "supports_function_calling": true, @@ -20994,7 +24635,7 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 40960, "max_output_tokens": 16384, - "max_tokens": 40960, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 2.4e-7 }, @@ -21003,7 +24644,7 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 40960, "max_output_tokens": 16384, - "max_tokens": 40960, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 6e-7 }, @@ -21012,7 +24653,7 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 40960, "max_output_tokens": 16384, - "max_tokens": 40960, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 3e-7 }, @@ -21021,45 +24662,57 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 40960, "max_output_tokens": 16384, - "max_tokens": 40960, + "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 3e-7 + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/alibaba/qwen3-coder": { "input_cost_per_token": 4e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 262144, "max_output_tokens": 66536, - "max_tokens": 262144, + "max_tokens": 66536, "mode": "chat", - "output_cost_per_token": 0.0000016 + "output_cost_per_token": 0.0000016, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/amazon/nova-lite": { "input_cost_per_token": 6e-8, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 300000, "max_output_tokens": 8192, - "max_tokens": 300000, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.4e-7 + "output_cost_per_token": 2.4e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_response_schema": true }, "vercel_ai_gateway/amazon/nova-micro": { "input_cost_per_token": 3.5e-8, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.4e-7 + "output_cost_per_token": 1.4e-7, + "supports_function_calling": true, + "supports_response_schema": true }, "vercel_ai_gateway/amazon/nova-pro": { "input_cost_per_token": 8e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 300000, "max_output_tokens": 8192, - "max_tokens": 300000, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0000032 + "output_cost_per_token": 0.0000032, + "supports_vision": true, + "supports_function_calling": true, + "supports_response_schema": true }, "vercel_ai_gateway/amazon/titan-embed-text-v2": { "input_cost_per_token": 2e-8, @@ -21077,9 +24730,13 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 4096, - "max_tokens": 200000, + "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.00000125 + "output_cost_per_token": 0.00000125, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/anthropic/claude-3-opus": { "cache_creation_input_token_cost": 0.00001875, @@ -21088,9 +24745,13 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 4096, - "max_tokens": 200000, + "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.000075 + "output_cost_per_token": 0.000075, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/anthropic/claude-3.5-haiku": { "cache_creation_input_token_cost": 0.000001, @@ -21099,9 +24760,13 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 8192, - "max_tokens": 200000, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.000004 + "output_cost_per_token": 0.000004, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/anthropic/claude-3.5-sonnet": { "cache_creation_input_token_cost": 0.00000375, @@ -21110,9 +24775,13 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 8192, - "max_tokens": 200000, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/anthropic/claude-3.7-sonnet": { "cache_creation_input_token_cost": 0.00000375, @@ -21121,9 +24790,13 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/anthropic/claude-4-opus": { "cache_creation_input_token_cost": 0.00001875, @@ -21132,9 +24805,13 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 32000, - "max_tokens": 200000, + "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 0.000075 + "output_cost_per_token": 0.000075, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/anthropic/claude-4-sonnet": { "cache_creation_input_token_cost": 0.00000375, @@ -21143,36 +24820,232 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "supports_function_calling": true, + "supports_tool_choice": true }, - "vercel_ai_gateway/cohere/command-a": { - "input_cost_per_token": 0.0000025, + "vercel_ai_gateway/anthropic/claude-3-5-sonnet": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", - "max_input_tokens": 256000, - "max_output_tokens": 8000, - "max_tokens": 256000, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.00001 + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true }, - "vercel_ai_gateway/cohere/command-r": { - "input_cost_per_token": 1.5e-7, + "vercel_ai_gateway/anthropic/claude-3-5-sonnet-20241022": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", - "max_input_tokens": 128000, - "max_output_tokens": 4096, - "max_tokens": 128000, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 6e-7 + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-3-7-sonnet": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-haiku-4.5": { + "cache_creation_input_token_cost": 0.00000125, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 0.000001, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000005, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4": { + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.000075, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.1": { + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.000075, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.5": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.6": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-sonnet-4": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-sonnet-4.5": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/cohere/command-a": { + "input_cost_per_token": 0.0000025, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 256000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 0.00001, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/cohere/command-r": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/cohere/command-r-plus": { "input_cost_per_token": 0.0000025, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.00001 + "output_cost_per_token": 0.00001, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/cohere/embed-v4.0": { "input_cost_per_token": 1.2e-7, @@ -21188,9 +25061,10 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.00000219 + "output_cost_per_token": 0.00000219, + "supports_tool_choice": true }, "vercel_ai_gateway/deepseek/deepseek-r1-distill-llama-70b": { "input_cost_per_token": 7.5e-7, @@ -21199,52 +25073,74 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 9.9e-7 + "output_cost_per_token": 9.9e-7, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/deepseek/deepseek-v3": { "input_cost_per_token": 9e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 9e-7 + "output_cost_per_token": 9e-7, + "supports_tool_choice": true }, "vercel_ai_gateway/google/gemini-2.0-flash": { + "deprecation_date": "2026-06-01", "input_cost_per_token": 1.5e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1048576, "max_output_tokens": 8192, - "max_tokens": 1048576, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 6e-7 + "output_cost_per_token": 6e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/google/gemini-2.0-flash-lite": { + "deprecation_date": "2026-06-01", "input_cost_per_token": 7.5e-8, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1048576, "max_output_tokens": 8192, - "max_tokens": 1048576, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 3e-7 + "output_cost_per_token": 3e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/google/gemini-2.5-flash": { "input_cost_per_token": 3e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1000000, "max_output_tokens": 65536, - "max_tokens": 1000000, + "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.0000025 + "output_cost_per_token": 0.0000025, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/google/gemini-2.5-pro": { "input_cost_per_token": 0.0000025, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1048576, "max_output_tokens": 65536, - "max_tokens": 1048576, + "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.00001 + "output_cost_per_token": 0.00001, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/google/gemma-2-9b": { "input_cost_per_token": 2e-7, @@ -21253,14 +25149,17 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2e-7 + "output_cost_per_token": 2e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/inception/mercury-coder-small": { "input_cost_per_token": 2.5e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 32000, "max_output_tokens": 16384, - "max_tokens": 32000, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.000001 }, @@ -21271,7 +25170,8 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 7.9e-7 + "output_cost_per_token": 7.9e-7, + "supports_tool_choice": true }, "vercel_ai_gateway/meta/llama-3-8b": { "input_cost_per_token": 5e-8, @@ -21280,41 +25180,48 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 8e-8 + "output_cost_per_token": 8e-8, + "supports_tool_choice": true }, "vercel_ai_gateway/meta/llama-3.1-70b": { "input_cost_per_token": 7.2e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 7.2e-7 + "output_cost_per_token": 7.2e-7, + "supports_tool_choice": true }, "vercel_ai_gateway/meta/llama-3.1-8b": { "input_cost_per_token": 5e-8, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 131000, "max_output_tokens": 131072, - "max_tokens": 131000, + "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 8e-8 + "output_cost_per_token": 8e-8, + "supports_function_calling": true, + "supports_response_schema": true }, "vercel_ai_gateway/meta/llama-3.2-11b": { "input_cost_per_token": 1.6e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.6e-7 + "output_cost_per_token": 1.6e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/meta/llama-3.2-1b": { "input_cost_per_token": 1e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat", "output_cost_per_token": 1e-7 }, @@ -21323,54 +25230,67 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-7 + "output_cost_per_token": 1.5e-7, + "supports_function_calling": true, + "supports_response_schema": true }, "vercel_ai_gateway/meta/llama-3.2-90b": { "input_cost_per_token": 7.2e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 7.2e-7 + "output_cost_per_token": 7.2e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/meta/llama-3.3-70b": { "input_cost_per_token": 7.2e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 8192, - "max_tokens": 128000, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 7.2e-7 + "output_cost_per_token": 7.2e-7, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/meta/llama-4-maverick": { "input_cost_per_token": 2e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 131072, "max_output_tokens": 8192, - "max_tokens": 131072, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 6e-7 + "output_cost_per_token": 6e-7, + "supports_tool_choice": true }, "vercel_ai_gateway/meta/llama-4-scout": { "input_cost_per_token": 1e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 131072, "max_output_tokens": 8192, - "max_tokens": 131072, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 3e-7 + "output_cost_per_token": 3e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/mistral/codestral": { "input_cost_per_token": 3e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 256000, "max_output_tokens": 4000, - "max_tokens": 256000, + "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 9e-7 + "output_cost_per_token": 9e-7, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/mistral/codestral-embed": { "input_cost_per_token": 1.5e-7, @@ -21388,43 +25308,55 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2.8e-7 + "output_cost_per_token": 2.8e-7, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/mistral/magistral-medium": { "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 64000, - "max_tokens": 128000, + "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 0.000005 + "output_cost_per_token": 0.000005, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/mistral/magistral-small": { "input_cost_per_token": 5e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 64000, - "max_tokens": 128000, + "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 0.0000015 + "output_cost_per_token": 0.0000015, + "supports_function_calling": true }, "vercel_ai_gateway/mistral/ministral-3b": { "input_cost_per_token": 4e-8, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 4000, - "max_tokens": 128000, + "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 4e-8 + "output_cost_per_token": 4e-8, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/mistral/ministral-8b": { "input_cost_per_token": 1e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 4000, - "max_tokens": 128000, + "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 1e-7 + "output_cost_per_token": 1e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/mistral/mistral-embed": { "input_cost_per_token": 1e-7, @@ -21440,9 +25372,11 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 32000, "max_output_tokens": 4000, - "max_tokens": 32000, + "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 0.000006 + "output_cost_per_token": 0.000006, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/mistral/mistral-saba-24b": { "input_cost_per_token": 7.9e-7, @@ -21458,52 +25392,66 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 32000, "max_output_tokens": 4000, - "max_tokens": 32000, + "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 3e-7 + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/mistral/mixtral-8x22b-instruct": { "input_cost_per_token": 0.0000012, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 65536, "max_output_tokens": 2048, - "max_tokens": 65536, + "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 0.0000012 + "output_cost_per_token": 0.0000012, + "supports_function_calling": true }, "vercel_ai_gateway/mistral/pixtral-12b": { "input_cost_per_token": 1.5e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 4000, - "max_tokens": 128000, + "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 1.5e-7 + "output_cost_per_token": 1.5e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/mistral/pixtral-large": { "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 4000, - "max_tokens": 128000, + "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 0.000006 + "output_cost_per_token": 0.000006, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/moonshotai/kimi-k2": { "input_cost_per_token": 5.5e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 131072, "max_output_tokens": 16384, - "max_tokens": 131072, + "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 0.0000022 + "output_cost_per_token": 0.0000022, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/morph/morph-v3-fast": { "input_cost_per_token": 8e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 32768, "max_output_tokens": 16384, - "max_tokens": 32768, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.0000012 }, @@ -21512,7 +25460,7 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 32768, "max_output_tokens": 16384, - "max_tokens": 32768, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.0000019 }, @@ -21521,16 +25469,18 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 16385, "max_output_tokens": 4096, - "max_tokens": 16385, + "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0000015 + "output_cost_per_token": 0.0000015, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/openai/gpt-3.5-turbo-instruct": { "input_cost_per_token": 0.0000015, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 8192, "max_output_tokens": 4096, - "max_tokens": 8192, + "max_tokens": 4096, "mode": "chat", "output_cost_per_token": 0.000002 }, @@ -21539,9 +25489,12 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 4096, - "max_tokens": 128000, + "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.00003 + "output_cost_per_token": 0.00003, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/openai/gpt-4.1": { "cache_creation_input_token_cost": 0, @@ -21550,9 +25503,13 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1047576, "max_output_tokens": 32768, - "max_tokens": 1047576, + "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.000008 + "output_cost_per_token": 0.000008, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/gpt-4.1-mini": { "cache_creation_input_token_cost": 0, @@ -21561,9 +25518,13 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1047576, "max_output_tokens": 32768, - "max_tokens": 1047576, + "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0000016 + "output_cost_per_token": 0.0000016, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/gpt-4.1-nano": { "cache_creation_input_token_cost": 0, @@ -21572,9 +25533,13 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1047576, "max_output_tokens": 32768, - "max_tokens": 1047576, + "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 4e-7 + "output_cost_per_token": 4e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/gpt-4o": { "cache_creation_input_token_cost": 0, @@ -21583,9 +25548,13 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 16384, - "max_tokens": 128000, + "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 0.00001 + "output_cost_per_token": 0.00001, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/gpt-4o-mini": { "cache_creation_input_token_cost": 0, @@ -21594,9 +25563,13 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 16384, - "max_tokens": 128000, + "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 6e-7 + "output_cost_per_token": 6e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/o1": { "cache_creation_input_token_cost": 0, @@ -21605,9 +25578,13 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 100000, - "max_tokens": 200000, + "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0.00006 + "output_cost_per_token": 0.00006, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/o3": { "cache_creation_input_token_cost": 0, @@ -21616,9 +25593,13 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 100000, - "max_tokens": 200000, + "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0.000008 + "output_cost_per_token": 0.000008, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/o3-mini": { "cache_creation_input_token_cost": 0, @@ -21627,9 +25608,12 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 100000, - "max_tokens": 200000, + "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0.0000044 + "output_cost_per_token": 0.0000044, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/o4-mini": { "cache_creation_input_token_cost": 0, @@ -21638,16 +25622,20 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 100000, - "max_tokens": 200000, + "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0.0000044 + "output_cost_per_token": 0.0000044, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/perplexity/sonar": { "input_cost_per_token": 0.000001, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 127000, "max_output_tokens": 8000, - "max_tokens": 127000, + "max_tokens": 8000, "mode": "chat", "output_cost_per_token": 0.000001 }, @@ -21656,7 +25644,7 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 200000, "max_output_tokens": 8000, - "max_tokens": 200000, + "max_tokens": 8000, "mode": "chat", "output_cost_per_token": 0.000015 }, @@ -21665,7 +25653,7 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 127000, "max_output_tokens": 8000, - "max_tokens": 127000, + "max_tokens": 8000, "mode": "chat", "output_cost_per_token": 0.000005 }, @@ -21674,7 +25662,7 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 127000, "max_output_tokens": 8000, - "max_tokens": 127000, + "max_tokens": 8000, "mode": "chat", "output_cost_per_token": 0.000008 }, @@ -21683,27 +25671,35 @@ "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 32000, - "max_tokens": 128000, + "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/vercel/v0-1.5-md": { "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 32768, - "max_tokens": 128000, + "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/xai/grok-2": { "input_cost_per_token": 0.000002, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 131072, "max_output_tokens": 4000, - "max_tokens": 131072, + "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 0.00001 + "output_cost_per_token": 0.00001, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/xai/grok-2-vision": { "input_cost_per_token": 0.000002, @@ -21712,7 +25708,10 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.00001 + "output_cost_per_token": 0.00001, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/xai/grok-3": { "input_cost_per_token": 0.000003, @@ -21721,7 +25720,9 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/xai/grok-3-fast": { "input_cost_per_token": 0.000005, @@ -21730,7 +25731,8 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.000025 + "output_cost_per_token": 0.000025, + "supports_function_calling": true }, "vercel_ai_gateway/xai/grok-3-mini": { "input_cost_per_token": 3e-7, @@ -21739,7 +25741,9 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 5e-7 + "output_cost_per_token": 5e-7, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/xai/grok-3-mini-fast": { "input_cost_per_token": 6e-7, @@ -21748,7 +25752,9 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.000004 + "output_cost_per_token": 0.000004, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/xai/grok-4": { "input_cost_per_token": 0.000003, @@ -21757,7 +25763,9 @@ "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/zai/glm-4.5": { "input_cost_per_token": 6e-7, @@ -21766,16 +25774,20 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.0000022 + "output_cost_per_token": 0.0000022, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/zai/glm-4.5-air": { "input_cost_per_token": 2e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 128000, "max_output_tokens": 96000, - "max_tokens": 128000, + "max_tokens": 96000, "mode": "chat", - "output_cost_per_token": 0.0000011 + "output_cost_per_token": 0.0000011, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/zai/glm-4.6": { "litellm_provider": "vercel_ai_gateway", @@ -21834,7 +25846,9 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_vision": true }, "vertex_ai/claude-3-5-sonnet": { "input_cost_per_token": 0.000003, @@ -22105,17 +26119,78 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 159, + "supports_native_streaming": true }, - "vertex_ai/claude-sonnet-4-5": { - "cache_creation_input_token_cost": 0.00000375, - "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 0.000003, - "input_cost_per_token_above_200k_tokens": 0.000006, - "output_cost_per_token_above_200k_tokens": 0.0000225, - "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, - "cache_read_input_token_cost_above_200k_tokens": 6e-7, - "input_cost_per_token_batches": 0.0000015, + "vertex_ai/claude-opus-4-6": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000125, + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.000001, + "input_cost_per_token": 0.000005, + "input_cost_per_token_above_200k_tokens": 0.00001, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "output_cost_per_token_above_200k_tokens": 0.0000375, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "vertex_ai/claude-opus-4-6@default": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000125, + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.000001, + "input_cost_per_token": 0.000005, + "input_cost_per_token_above_200k_tokens": 0.00001, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "output_cost_per_token_above_200k_tokens": 0.0000375, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "vertex_ai/claude-sonnet-4-5": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "input_cost_per_token_batches": 0.0000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 64000, @@ -22133,6 +26208,36 @@ "supports_tool_choice": true, "supports_vision": true }, + "vertex_ai/claude-sonnet-4-6": { + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost": 3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + } + }, "vertex_ai/claude-sonnet-4-5@20250929": { "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, @@ -22157,7 +26262,8 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "supports_native_streaming": true }, "vertex_ai/claude-opus-4@20250514": { "cache_creation_input_token_cost": 0.00001875, @@ -22327,7 +26433,7 @@ "litellm_provider": "vertex_ai-deepseek_models", "max_input_tokens": 163840, "max_output_tokens": 32768, - "max_tokens": 163840, + "max_tokens": 32768, "mode": "chat", "output_cost_per_token": 0.0000054, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", @@ -22346,7 +26452,7 @@ "litellm_provider": "vertex_ai-deepseek_models", "max_input_tokens": 163840, "max_output_tokens": 32768, - "max_tokens": 163840, + "max_tokens": 32768, "mode": "chat", "output_cost_per_token": 0.00000168, "output_cost_per_token_batches": 8.4e-7, @@ -22375,6 +26481,57 @@ "supports_reasoning": true, "supports_tool_choice": true }, + "vertex_ai/gemini-3.1-flash-lite-preview": { + "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost_per_audio_token": 5e-8, + "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 2.5e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 0.0000015, + "output_cost_per_token": 0.0000015, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true + }, "vertex_ai/jamba-1.5": { "input_cost_per_token": 2e-7, "litellm_provider": "vertex_ai-ai21_models", @@ -22430,7 +26587,7 @@ "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 128000, "max_output_tokens": 2048, - "max_tokens": 128000, + "max_tokens": 2048, "mode": "chat", "output_cost_per_token": 0.000016, "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", @@ -22443,7 +26600,7 @@ "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 128000, "max_output_tokens": 2048, - "max_tokens": 128000, + "max_tokens": 2048, "mode": "chat", "output_cost_per_token": 0, "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", @@ -22456,7 +26613,7 @@ "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 128000, "max_output_tokens": 2048, - "max_tokens": 128000, + "max_tokens": 2048, "metadata": { "notes": "VertexAI states that The Llama 3.1 API service for llama-3.1-70b-instruct-maas and llama-3.1-8b-instruct-maas are in public preview and at no cost." }, @@ -22472,7 +26629,7 @@ "litellm_provider": "vertex_ai-llama_models", "max_input_tokens": 128000, "max_output_tokens": 2048, - "max_tokens": 128000, + "max_tokens": 2048, "metadata": { "notes": "VertexAI states that The Llama 3.2 API service is at no cost during public preview, and will be priced as per dollar-per-1M-tokens at GA." }, @@ -22621,6 +26778,34 @@ "supports_tool_choice": true, "supports_web_search": true }, + "vertex_ai/zai-org/glm-4.7-maas": { + "input_cost_per_token": 6e-7, + "litellm_provider": "vertex_ai-zai_models", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.0000022, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "vertex_ai/zai-org/glm-5-maas": { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 0.000001, + "litellm_provider": "vertex_ai-zai_models", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.0000032, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#glm-models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, "vertex_ai/mistral-medium-3": { "input_cost_per_token": 4e-7, "litellm_provider": "vertex_ai-mistral_models", @@ -22785,6 +26970,9 @@ "mode": "chat", "output_cost_per_token": 0.000001, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_regions": [ + "global" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -22797,6 +26985,9 @@ "mode": "chat", "output_cost_per_token": 0.000004, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_regions": [ + "global" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -22809,6 +27000,9 @@ "mode": "chat", "output_cost_per_token": 0.0000012, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_regions": [ + "global" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -22821,6 +27015,9 @@ "mode": "chat", "output_cost_per_token": 0.0000012, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_regions": [ + "global" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -22955,7 +27152,7 @@ "litellm_provider": "watsonx", "max_input_tokens": 8192, "max_output_tokens": 1024, - "max_tokens": 8192, + "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 2e-7, "supports_audio_input": false, @@ -22973,7 +27170,7 @@ "litellm_provider": "watsonx", "max_input_tokens": 131072, "max_output_tokens": 16384, - "max_tokens": 131072, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, "supports_audio_input": false, @@ -23349,6 +27546,7 @@ "supports_web_search": true }, "xai/grok-2-vision-1212": { + "deprecation_date": "2026-02-28", "input_cost_per_image": 0.000002, "input_cost_per_token": 0.000002, "litellm_provider": "xai", @@ -23377,6 +27575,7 @@ "supports_web_search": true }, "xai/grok-3": { + "cache_read_input_token_cost": 7.5e-7, "input_cost_per_token": 0.000003, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -23391,6 +27590,7 @@ "supports_web_search": true }, "xai/grok-3-beta": { + "cache_read_input_token_cost": 7.5e-7, "input_cost_per_token": 0.000003, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -23405,6 +27605,7 @@ "supports_web_search": true }, "xai/grok-3-fast-beta": { + "cache_read_input_token_cost": 0.00000125, "input_cost_per_token": 0.000005, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -23419,6 +27620,7 @@ "supports_web_search": true }, "xai/grok-3-fast-latest": { + "cache_read_input_token_cost": 0.00000125, "input_cost_per_token": 0.000005, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -23433,6 +27635,7 @@ "supports_web_search": true }, "xai/grok-3-latest": { + "cache_read_input_token_cost": 7.5e-7, "input_cost_per_token": 0.000003, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -23447,6 +27650,8 @@ "supports_web_search": true }, "xai/grok-3-mini": { + "cache_read_input_token_cost": 7.5e-8, + "deprecation_date": "2026-02-28", "input_cost_per_token": 3e-7, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -23462,6 +27667,8 @@ "supports_web_search": true }, "xai/grok-3-mini-beta": { + "cache_read_input_token_cost": 7.5e-8, + "deprecation_date": "2026-02-28", "input_cost_per_token": 3e-7, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -23477,6 +27684,7 @@ "supports_web_search": true }, "xai/grok-3-mini-fast": { + "cache_read_input_token_cost": 1.5e-7, "input_cost_per_token": 6e-7, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -23492,6 +27700,7 @@ "supports_web_search": true }, "xai/grok-3-mini-fast-beta": { + "cache_read_input_token_cost": 1.5e-7, "input_cost_per_token": 6e-7, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -23507,6 +27716,7 @@ "supports_web_search": true }, "xai/grok-3-mini-fast-latest": { + "cache_read_input_token_cost": 1.5e-7, "input_cost_per_token": 6e-7, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -23522,6 +27732,7 @@ "supports_web_search": true }, "xai/grok-3-mini-latest": { + "cache_read_input_token_cost": 7.5e-8, "input_cost_per_token": 3e-7, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -23778,7 +27989,68 @@ "supports_vision": true, "supports_web_search": true }, + "zai.glm-4.7": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.0000022, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "zai/glm-5": { + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 2e-7, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.0000032, + "litellm_provider": "zai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-5-code": { + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.0000012, + "output_cost_per_token": 0.000005, + "litellm_provider": "zai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.7": { + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000022, + "litellm_provider": "zai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, "zai/glm-4.6": { + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 1.1e-7, "input_cost_per_token": 6e-7, "output_cost_per_token": 0.0000022, "litellm_provider": "zai", @@ -23786,6 +28058,8 @@ "max_output_tokens": 128000, "mode": "chat", "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, "supports_tool_choice": true, "source": "https://docs.z.ai/guides/overview/pricing" }, @@ -25747,5 +30021,1814 @@ "output_cost_per_token": 2e-7, "litellm_provider": "fireworks_ai", "mode": "chat" + }, + "novita/deepseek/deepseek-v3.2": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.69e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 163840, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.345e-7, + "input_cost_per_token_cache_hit": 1.345e-7, + "supports_reasoning": true + }, + "novita/minimax/minimax-m2.1": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000012, + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 3e-8, + "input_cost_per_token_cache_hit": 3e-8 + }, + "novita/zai-org/glm-4.7": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000022, + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token_cache_hit": 1.1e-7, + "supports_reasoning": true + }, + "novita/xiaomimimo/mimo-v2-flash": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, + "max_input_tokens": 262144, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 2e-8, + "input_cost_per_token_cache_hit": 2e-8, + "supports_reasoning": true + }, + "novita/zai-org/autoglm-phone-9b-multilingual": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3.5e-8, + "output_cost_per_token": 1.38e-7, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/moonshotai/kimi-k2-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000025, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/minimax/minimax-m2": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000012, + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "cache_read_input_token_cost": 3e-8, + "input_cost_per_token_cache_hit": 3e-8, + "supports_reasoning": true + }, + "novita/paddlepaddle/paddleocr-vl": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-8, + "output_cost_per_token": 2e-8, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/deepseek/deepseek-v3.2-exp": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 4.1e-7, + "max_input_tokens": 163840, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-vl-235b-a22b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 9.8e-7, + "output_cost_per_token": 0.00000395, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_vision": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/zai-org/glm-4.6v": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 9e-7, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 5.5e-8, + "input_cost_per_token_cache_hit": 5.5e-8, + "supports_reasoning": true + }, + "novita/zai-org/glm-4.6": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5.5e-7, + "output_cost_per_token": 0.0000022, + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token_cache_hit": 1.1e-7, + "supports_reasoning": true + }, + "novita/kwaipilot/kat-coder-pro": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000012, + "max_input_tokens": 256000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 6e-8, + "input_cost_per_token_cache_hit": 6e-8 + }, + "novita/qwen/qwen3-next-80b-a3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 0.0000015, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-next-80b-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 0.0000015, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/deepseek/deepseek-ocr": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-8, + "output_cost_per_token": 3e-8, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-v3.1-terminus": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 0.000001, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.35e-7, + "input_cost_per_token_cache_hit": 1.35e-7, + "supports_reasoning": true + }, + "novita/qwen/qwen3-vl-235b-a22b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000015, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-max": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 0.00000211, + "output_cost_per_token": 0.00000845, + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/skywork/r1v4-lite": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-v3.1": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 0.000001, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.35e-7, + "input_cost_per_token_cache_hit": 1.35e-7, + "supports_reasoning": true + }, + "novita/moonshotai/kimi-k2-0905": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000025, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-coder-480b-a35b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000013, + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-coder-30b-a3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-8, + "output_cost_per_token": 2.7e-7, + "max_input_tokens": 160000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/openai/gpt-oss-120b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 2.5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/moonshotai/kimi-k2-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5.7e-7, + "output_cost_per_token": 0.0000023, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-v3-0324": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 0.00000112, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.35e-7, + "input_cost_per_token_cache_hit": 1.35e-7 + }, + "novita/zai-org/glm-4.5": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000022, + "max_input_tokens": 131072, + "max_output_tokens": 98304, + "max_tokens": 98304, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token_cache_hit": 1.1e-7, + "supports_reasoning": true + }, + "novita/qwen/qwen3-235b-a22b-thinking-2507": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.000003, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-3.1-8b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-8, + "output_cost_per_token": 5e-8, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_system_messages": true + }, + "novita/google/gemma-3-12b-it": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 1e-7, + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/zai-org/glm-4.5v": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000018, + "max_input_tokens": 65536, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token_cache_hit": 1.1e-7, + "supports_reasoning": true + }, + "novita/openai/gpt-oss-20b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-235b-a22b-instruct-2507": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 9e-8, + "output_cost_per_token": 5.8e-7, + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-r1-distill-qwen-14b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, + "max_input_tokens": 32768, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-3.3-70b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.35e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 131072, + "max_output_tokens": 120000, + "max_tokens": 120000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/qwen/qwen-2.5-72b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 4e-7, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/mistralai/mistral-nemo": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.7e-7, + "max_input_tokens": 60288, + "max_output_tokens": 16000, + "max_tokens": 16000, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/minimaxai/minimax-m1-80k": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5.5e-7, + "output_cost_per_token": 0.0000022, + "max_input_tokens": 1000000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/deepseek/deepseek-r1-0528": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-7, + "output_cost_per_token": 0.0000025, + "max_input_tokens": 163840, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 3.5e-7, + "input_cost_per_token_cache_hit": 3.5e-7, + "supports_reasoning": true + }, + "novita/deepseek/deepseek-r1-distill-qwen-32b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-7, + "output_cost_per_token": 3e-7, + "max_input_tokens": 64000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-3-8b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4e-8, + "output_cost_per_token": 4e-8, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_system_messages": true + }, + "novita/microsoft/wizardlm-2-8x22b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6.2e-7, + "output_cost_per_token": 6.2e-7, + "max_input_tokens": 65535, + "max_output_tokens": 8000, + "max_tokens": 8000, + "supports_system_messages": true + }, + "novita/deepseek/deepseek-r1-0528-qwen3-8b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-8, + "output_cost_per_token": 9e-8, + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/deepseek/deepseek-r1-distill-llama-70b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 8e-7, + "output_cost_per_token": 8e-7, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-3-70b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5.1e-7, + "output_cost_per_token": 7.4e-7, + "max_input_tokens": 8192, + "max_output_tokens": 8000, + "max_tokens": 8000, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-235b-a22b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 8e-7, + "max_input_tokens": 40960, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-4-maverick-17b-128e-instruct-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 8.5e-7, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/meta-llama/llama-4-scout-17b-16e-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 5.9e-7, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/nousresearch/hermes-2-pro-llama-3-8b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 1.4e-7, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen2.5-vl-72b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 8e-7, + "output_cost_per_token": 8e-7, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/sao10k/l3-70b-euryale-v2.1": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 0.00000148, + "output_cost_per_token": 0.00000148, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/baidu/ernie-4.5-21B-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-8, + "output_cost_per_token": 2.8e-7, + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/sao10k/l3-8b-lunaris": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 5e-8, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/baichuan/baichuan-m2-32b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-8, + "output_cost_per_token": 7e-8, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_system_messages": true + }, + "novita/baidu/ernie-4.5-vl-424b-a47b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4.2e-7, + "output_cost_per_token": 0.00000125, + "max_input_tokens": 123000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "supports_vision": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/baidu/ernie-4.5-300b-a47b-paddle": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.8e-7, + "output_cost_per_token": 0.0000011, + "max_input_tokens": 123000, + "max_output_tokens": 12000, + "max_tokens": 12000, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-prover-v2-671b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-7, + "output_cost_per_token": 0.0000025, + "max_input_tokens": 160000, + "max_output_tokens": 160000, + "max_tokens": 160000, + "supports_system_messages": true + }, + "novita/qwen/qwen3-32b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4.5e-7, + "max_input_tokens": 40960, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-30b-a3b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 9e-8, + "output_cost_per_token": 4.5e-7, + "max_input_tokens": 40960, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/google/gemma-3-27b-it": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.19e-7, + "output_cost_per_token": 2e-7, + "max_input_tokens": 98304, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/deepseek/deepseek-v3-turbo": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000013, + "max_input_tokens": 64000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/deepseek/deepseek-r1-turbo": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-7, + "output_cost_per_token": 0.0000025, + "max_input_tokens": 64000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/Sao10K/L3-8B-Stheno-v3.2": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5e-8, + "output_cost_per_token": 5e-8, + "max_input_tokens": 8192, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/gryphe/mythomax-l2-13b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 9e-8, + "output_cost_per_token": 9e-8, + "max_input_tokens": 4096, + "max_output_tokens": 3200, + "max_tokens": 3200, + "supports_system_messages": true + }, + "novita/baidu/ernie-4.5-vl-28b-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3.9e-7, + "output_cost_per_token": 3.9e-7, + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-vl-8b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 8e-8, + "output_cost_per_token": 5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/zai-org/glm-4.5-air": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 8.5e-7, + "max_input_tokens": 131072, + "max_output_tokens": 98304, + "max_tokens": 98304, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-vl-30b-a3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 7e-7, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-vl-30b-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.000001, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-omni-30b-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 9.7e-7, + "max_input_tokens": 65536, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true, + "supports_audio_input": true + }, + "novita/qwen/qwen3-omni-30b-a3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 9.7e-7, + "max_input_tokens": 65536, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_audio_input": true, + "supports_audio_output": true + }, + "novita/qwen/qwen-mt-plus": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 7.5e-7, + "max_input_tokens": 16384, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_system_messages": true + }, + "novita/baidu/ernie-4.5-vl-28b-a3b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 5.6e-7, + "max_input_tokens": 30000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/baidu/ernie-4.5-21B-a3b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-8, + "output_cost_per_token": 2.8e-7, + "max_input_tokens": 120000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/qwen/qwen3-8b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3.5e-8, + "output_cost_per_token": 1.38e-7, + "max_input_tokens": 128000, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-4b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-8, + "output_cost_per_token": 3e-8, + "max_input_tokens": 128000, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/qwen/qwen2.5-7b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-8, + "output_cost_per_token": 7e-8, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/meta-llama/llama-3.2-3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-8, + "output_cost_per_token": 5e-8, + "max_input_tokens": 32768, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/sao10k/l31-70b-euryale-v2.2": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 0.00000148, + "output_cost_per_token": 0.00000148, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "llamagate/llama-3.1-8b": { + "max_tokens": 8192, + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "input_cost_per_token": 3e-8, + "output_cost_per_token": 5e-8, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/llama-3.2-3b": { + "max_tokens": 8192, + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 8e-8, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/mistral-7b-v0.3": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/qwen3-8b": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.4e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/dolphin3-8b": { + "max_tokens": 8192, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/deepseek-r1-8b": { + "max_tokens": 16384, + "max_input_tokens": 65536, + "max_output_tokens": 16384, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "llamagate/deepseek-r1-7b-qwen": { + "max_tokens": 16384, + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "llamagate/openthinker-7b": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "llamagate/qwen2.5-coder-7b": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 1.2e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/deepseek-coder-6.7b": { + "max_tokens": 4096, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 1.2e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/codellama-7b": { + "max_tokens": 4096, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 1.2e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/qwen3-vl-8b": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 5.5e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "llamagate/llava-7b": { + "max_tokens": 2048, + "max_input_tokens": 4096, + "max_output_tokens": 2048, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_response_schema": true, + "supports_vision": true + }, + "llamagate/gemma3-4b": { + "max_tokens": 8192, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "input_cost_per_token": 3e-8, + "output_cost_per_token": 8e-8, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "sarvam/sarvam-m": { + "cache_creation_input_token_cost": 0, + "cache_creation_input_token_cost_above_1hr": 0, + "cache_read_input_token_cost": 0, + "input_cost_per_token": 0, + "litellm_provider": "sarvam", + "max_input_tokens": 8192, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0, + "supports_reasoning": true + }, + "gpt-5-search-api": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 0.00000125, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00001, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-5-search-api-2025-10-14": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 0.00000125, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00001, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-realtime-mini-2025-10-06": { + "cache_creation_input_audio_token_cost": 3e-7, + "cache_read_input_audio_token_cost": 3e-7, + "cache_read_input_token_cost": 6e-8, + "input_cost_per_audio_token": 0.00001, + "input_cost_per_image": 8e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-realtime-mini-2025-12-15": { + "cache_creation_input_audio_token_cost": 3e-7, + "cache_read_input_audio_token_cost": 3e-7, + "cache_read_input_token_cost": 6e-8, + "input_cost_per_audio_token": 0.00001, + "input_cost_per_image": 8e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gemini/gemini-2.0-flash-lite-001": { + "cache_read_input_token_cost": 1.875e-8, + "deprecation_date": "2026-06-01", + "input_cost_per_audio_token": 7.5e-8, + "input_cost_per_token": 7.5e-8, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 50, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-7, + "rpm": 4000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite", + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 4000000 + }, + "gemini-2.5-flash-native-audio-latest": { + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true + }, + "gemini-2.5-flash-native-audio-preview-09-2025": { + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true + }, + "gemini-2.5-flash-native-audio-preview-12-2025": { + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true + }, + "gemini/gemini-2.5-flash-native-audio-latest": { + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini/gemini-2.5-flash-native-audio-preview-09-2025": { + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini/gemini-2.5-flash-native-audio-preview-12-2025": { + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini-flash-latest": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000 + }, + "gemini-flash-lite-latest": { + "cache_read_input_token_cost": 1e-8, + "input_cost_per_audio_token": 3e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-7, + "output_cost_per_token": 4e-7, + "rpm": 15, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini-pro-latest": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, + "rpm": 2000, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000 + }, + "gemini/gemini-pro-latest": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, + "rpm": 2000, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000 + }, + "gemini-exp-1206": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000 + }, + "vertex_ai/claude-sonnet-4-6@default": { + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost": 3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + } } } \ No newline at end of file diff --git a/cecli/tools/update_todo_list.py b/cecli/tools/update_todo_list.py index efd3ffa2741..461f5b4bdf2 100644 --- a/cecli/tools/update_todo_list.py +++ b/cecli/tools/update_todo_list.py @@ -204,9 +204,6 @@ def format_output(cls, coder, mcp_server, tool_response): remaining_tasks.append(f"○ {task_item['task']}") # Display formatted todo list - coder.io.tool_output("") - coder.io.tool_output(f"{color_start}Todo List:{color_end}") - if done_tasks: coder.io.tool_output("Done:") for task in done_tasks: diff --git a/cecli/tui/widgets/output.py b/cecli/tui/widgets/output.py index 041c03e8965..f0f286e8d3c 100644 --- a/cecli/tui/widgets/output.py +++ b/cecli/tui/widgets/output.py @@ -4,6 +4,7 @@ import textwrap from rich.markdown import Markdown +from rich.markup import escape from rich.padding import Padding from rich.style import Style as RichStyle from rich.text import Text @@ -90,7 +91,6 @@ async def stream_chunk(self, text: str): # Check for cost updates in the text self._check_cost(text) - # Add text to line buffer self._line_buffer += text @@ -111,7 +111,7 @@ async def stream_chunk(self, text: str): # Output each wrapped line for wrapped in wrapped_line.split("\n"): if wrapped.strip(): - self.output(wrapped, render_markdown=True) + self.output(escape(wrapped), render_markdown=True) async def end_response(self): """End the current LLM response.""" @@ -156,7 +156,8 @@ def add_user_message(self, text: str): for wrapped in wrapped_line.split("\n"): if wrapped.strip(): self.output( - f"[bold medium_spring_green]{wrapped}[/bold medium_spring_green]" + f"[bold medium_spring_green]{escape(wrapped)}[/bold" + " medium_spring_green]" ) self.scroll_end(animate=False) diff --git a/cecli/website/docs/config/agent-mode.md b/cecli/website/docs/config/agent-mode.md index 5d47d9f42dc..f7a5e2e308a 100644 --- a/cecli/website/docs/config/agent-mode.md +++ b/cecli/website/docs/config/agent-mode.md @@ -167,6 +167,7 @@ agent-config: exclude_context_blocks: ["symbol_outline", "directory_structure"] # Optional: Context blocks to exclude # Performance and behavior settings + hot_reload: false # automatically reload skills folders and definitions between turns large_file_token_threshold: 12500 # Token threshold for large file warnings skip_cli_confirmations: false # YOLO mode - be brave and let the LLM cook command_timeout: 30 # Time to wait for commands to finish before automatic backgrounding occurs