diff --git a/cecli/__init__.py b/cecli/__init__.py
index 421a7a40714..5ff9ffdc8a9 100644
--- a/cecli/__init__.py
+++ b/cecli/__init__.py
@@ -1,6 +1,6 @@
from packaging import version
-__version__ = "0.99.1.dev"
+__version__ = "0.99.2.dev"
safe_version = __version__
try:
diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py
index d6edbb84341..342a183eb1a 100644
--- a/cecli/coders/agent_coder.py
+++ b/cecli/coders/agent_coder.py
@@ -95,7 +95,7 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def _setup_agent(self):
- os.makedirs(".cecli/workspace", exist_ok=True)
+ os.makedirs(".cecli/temp", exist_ok=True)
def _get_agent_config(self):
"""
@@ -1020,7 +1020,7 @@ def _generate_tool_context(self, repetitive_tools):
self.model_kwargs = {
"temperature": default_temp + 0.1,
"frequency_penalty": default_fp + 0.2,
- "presence_penalty": 0.1,
+ # "presence_penalty": 0.1,
}
else:
temperature = nested.getter(self.model_kwargs, "temperature", default_temp)
diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py
index 4139412156a..6e41c80a4e9 100755
--- a/cecli/coders/base_coder.py
+++ b/cecli/coders/base_coder.py
@@ -229,6 +229,7 @@ async def create(
file_watcher=from_coder.file_watcher,
mcp_manager=from_coder.mcp_manager,
uuid=from_coder.uuid,
+ repo=from_coder.repo,
)
use_kwargs.update(update) # override to complete the switch
use_kwargs.update(kwargs) # override passed kwargs
@@ -328,6 +329,7 @@ def __init__(
uuid="",
):
# initialize from args.map_cache_dir
+ self.interrupt_event = asyncio.Event()
self.uuid = generate_unique_id()
if uuid:
self.uuid = uuid
@@ -1735,6 +1737,7 @@ def keyboard_interrupt(self):
self.io.tool_warning("\n\n^C KeyboardInterrupt")
+ self.interrupt_event.set()
self.last_keyboard_interrupt = time.time()
# Old summarization system removed - using context compaction logic instead
@@ -2768,6 +2771,7 @@ async def process_tool_calls(self, tool_call_response):
def _print_tool_call_info(self, server_tool_calls):
"""Print information about an MCP tool call."""
+ self.io.ring_bell()
# self.io.tool_output("Preparing to run MCP tools", bold=False)
for server, tool_calls in server_tool_calls.items():
@@ -3039,6 +3043,7 @@ async def check_for_file_mentions(self, content):
return prompts.added_files.format(fnames=", ".join(added_fnames))
async def send(self, messages, model=None, functions=None, tools=None):
+ self.interrupt_event.clear()
self.got_reasoning_content = False
self.ended_reasoning_content = False
@@ -3058,15 +3063,33 @@ async def send(self, messages, model=None, functions=None, tools=None):
self.token_profiler.start()
try:
- hash_object, completion = await model.send_completion(
- messages,
- functions,
- self.stream,
- self.temperature,
- # This could include any tools, but for now it is just MCP tools
- tools=tools,
- override_kwargs=self.model_kwargs.copy(),
+ completion_task = asyncio.create_task(
+ model.send_completion(
+ messages,
+ functions,
+ self.stream,
+ self.temperature,
+ # This could include any tools, but for now it is just MCP tools
+ tools=tools,
+ override_kwargs=self.model_kwargs.copy(),
+ )
+ )
+ interrupt_task = asyncio.create_task(self.interrupt_event.wait())
+
+ done, pending = await asyncio.wait(
+ {completion_task, interrupt_task},
+ return_when=asyncio.FIRST_COMPLETED,
)
+
+ if interrupt_task in done:
+ completion_task.cancel()
+ try:
+ await completion_task
+ except asyncio.CancelledError:
+ pass
+ raise KeyboardInterrupt
+
+ hash_object, completion = completion_task.result()
self.chat_completion_call_hashes.append(hash_object.hexdigest())
if not isinstance(completion, ModelResponse):
diff --git a/cecli/helpers/monorepo/project.py b/cecli/helpers/monorepo/project.py
index 4c874508928..516208a2296 100644
--- a/cecli/helpers/monorepo/project.py
+++ b/cecli/helpers/monorepo/project.py
@@ -11,6 +11,7 @@ def __init__(self, workspace_path: Path, config: Dict[str, Any]):
self.config = config
self.name = config["name"]
self.repo_url = config["repo"]
+ self.ignore_file = config.get("ignore")
self.base_path = workspace_path / self.name
self.main_path = self.base_path / "main"
diff --git a/cecli/helpers/monorepo/workspace.py b/cecli/helpers/monorepo/workspace.py
index 54fd22f4432..0482ff0d220 100644
--- a/cecli/helpers/monorepo/workspace.py
+++ b/cecli/helpers/monorepo/workspace.py
@@ -24,6 +24,17 @@ def initialize(self) -> None:
project = Project(self.path, proj_cfg)
project.initialize()
+ # Copy ignore files to workspace root
+ for proj_cfg in projects_config:
+ ignore_file = proj_cfg.get("ignore")
+ if ignore_file:
+ ignore_path = Path(ignore_file).expanduser()
+ if ignore_path.exists():
+ import shutil
+
+ dest_path = self.path / f"{proj_cfg['name']}.ignore"
+ shutil.copy2(ignore_path, dest_path)
+
# Write metadata
import json
diff --git a/cecli/helpers/skills.py b/cecli/helpers/skills.py
index 06c7fd24ff1..a239aebf95f 100644
--- a/cecli/helpers/skills.py
+++ b/cecli/helpers/skills.py
@@ -35,6 +35,7 @@ class SkillContent:
references: Dict[str, Path] = field(default_factory=dict)
scripts: Dict[str, Path] = field(default_factory=dict)
assets: Dict[str, Path] = field(default_factory=dict)
+ evals: Dict[str, Path] = field(default_factory=dict)
class SkillsManager:
@@ -227,6 +228,9 @@ def _load_complete_skill(self, metadata: SkillMetadata) -> SkillContent:
# Load assets
assets = self._load_assets(skill_dir)
+ # Load evals
+ evals = self._load_evals(skill_dir)
+
return SkillContent(
metadata=metadata,
frontmatter=frontmatter,
@@ -234,6 +238,7 @@ def _load_complete_skill(self, metadata: SkillMetadata) -> SkillContent:
references=references,
scripts=scripts,
assets=assets,
+ evals=evals,
)
def _load_references(self, skill_dir: Path) -> Dict[str, Path]:
@@ -286,6 +291,23 @@ def _load_assets(self, skill_dir: Path) -> Dict[str, Path]:
return assets
+ def _load_evals(self, skill_dir: Path) -> Dict[str, Path]:
+ """Load eval files from the evals/ directory."""
+ evals = {}
+ evals_dir = skill_dir / "evals"
+
+ if evals_dir.exists():
+ for eval_file in evals_dir.glob("**/*"):
+ if eval_file.is_file():
+ try:
+ # Use relative path as key, store the Path object
+ rel_path = eval_file.relative_to(evals_dir)
+ evals[str(rel_path)] = eval_file
+ except Exception:
+ continue
+
+ return evals
+
def get_skill_summary(self, skill_name: str) -> Optional[str]:
"""
Get a summary of a skill for display purposes.
@@ -315,9 +337,11 @@ def get_skill_summary(self, skill_name: str) -> Optional[str]:
ref_count = len(skill.references)
script_count = len(skill.scripts)
asset_count = len(skill.assets)
+ eval_count = len(skill.evals)
summary += (
- f"Resources: {ref_count} references, {script_count} scripts, {asset_count} assets\n"
+ f"Resources: {ref_count} references, {script_count} scripts, {asset_count} assets,"
+ f" {eval_count} evals\n"
)
return summary
@@ -540,6 +564,14 @@ def get_skills_content(self) -> Optional[str]:
result += f"- **{asset_name}**: `{asset_path}`\n"
result += "\n"
+ # Add evals file paths
+ if skill_content.evals:
+ result += f"#### Evals ({len(skill_content.evals)} file(s))\n\n"
+ result += "Available eval files:\n\n"
+ for eval_name, eval_path in skill_content.evals.items():
+ result += f"- **{eval_name}**: `{eval_path}`\n"
+ result += "\n"
+
result += "---\n\n"
result += ""
diff --git a/cecli/io.py b/cecli/io.py
index 8f572b7e856..4f50b9f6a02 100644
--- a/cecli/io.py
+++ b/cecli/io.py
@@ -758,6 +758,11 @@ def rule(self):
print()
def interrupt_input(self):
+ if self.coder:
+ coder = self.coder()
+ if coder and hasattr(coder, "interrupt_event"):
+ coder.interrupt_event.set()
+
if self.prompt_session and self.prompt_session.app:
# Store any partial input before interrupting
self.placeholder = self.prompt_session.app.current_buffer.text
@@ -1301,6 +1306,7 @@ async def _confirm_ask(
self.user_input(f"{question} - {res}", log_only=False)
else:
# Ring the bell if needed
+ self.notify_user_input_required()
self.ring_bell()
self.start_spinner("Awaiting Confirmation...", False)
@@ -1708,22 +1714,28 @@ def get_default_notification_command(self):
return None # Unknown system
+ def _send_notification(self):
+ if self.notifications_command:
+ try:
+ result = subprocess.run(self.notifications_command, shell=True, capture_output=True)
+ if result.returncode != 0 and result.stderr:
+ error_msg = result.stderr.decode("utf-8", errors="replace")
+ self.tool_warning(f"Failed to run notifications command: {error_msg}")
+ except Exception as e:
+ self.tool_warning(f"Failed to run notifications command: {e}")
+ else:
+ print("\a", end="", flush=True) # Ring the bell
+
+ def notify_user_input_required(self):
+ """Send a notification that user input is required."""
+ if self.notifications:
+ self._send_notification()
+
def ring_bell(self):
"""Ring the terminal bell if needed and clear the flag"""
if self.bell_on_next_input and self.notifications:
- if self.notifications_command:
- try:
- result = subprocess.run(
- self.notifications_command, shell=True, capture_output=True
- )
- if result.returncode != 0 and result.stderr:
- error_msg = result.stderr.decode("utf-8", errors="replace")
- self.tool_warning(f"Failed to run notifications command: {error_msg}")
- except Exception as e:
- self.tool_warning(f"Failed to run notifications command: {e}")
- else:
- print("\a", end="", flush=True) # Ring the bell
- self.bell_on_next_input = False # Clear the flag
+ self._send_notification()
+ self.bell_on_next_input = False
def toggle_multiline_mode(self):
"""Toggle between normal and multiline input modes"""
diff --git a/cecli/main.py b/cecli/main.py
index 3fe629c46c5..76b92cb0870 100644
--- a/cecli/main.py
+++ b/cecli/main.py
@@ -610,7 +610,7 @@ async def main_async(argv=None, input=None, output=None, force_git_root=None, re
if hasattr(args, "hooks") and args.hooks is not None:
args.hooks = convert_yaml_to_json_string(args.hooks)
if hasattr(args, "workspaces") and args.workspaces is not None:
- args.hooks = convert_yaml_to_json_string(args.workspaces)
+ args.workspaces = convert_yaml_to_json_string(args.workspaces)
# Interpolate environment variables in all string arguments
for key, value in vars(args).items():
diff --git a/cecli/prompts/agent.yml b/cecli/prompts/agent.yml
index 1e3d7a7d924..fcadb95228d 100644
--- a/cecli/prompts/agent.yml
+++ b/cecli/prompts/agent.yml
@@ -18,13 +18,15 @@ main_system: |
## Core Directives
**Act Proactively**: Autonomously use discovery and management tools (`ViewFilesAtGlob`, `ViewFilesMatching`, `Ls`, `ContextManager`) to fulfill the request. Chain tool calls across multiple turns for continuous exploration.
- **Be Decisive**: Trust your findings. Do not repeat identical searches or ask redundant questions once a path is established.
+ **Be Decisive**: Trust your findings. Do not repeat identical searches or ask redundant questions.
**Be Efficient**: Batch tool calls when tools allow you to. Respect usage limits while maximizing the utility of each turn.
+ **Be Persistent**: Do not take short cuts. Work through your task until completion. No task takes too long as long as you are making progress towards the goal.
### 1. FILE FORMAT
Files are provided in "hashline" format. Each line starts with a case-sensitive content hash followed by `::`.
+ Do not attempt to write these content hashes. They are automatically generated, maintained, and subject to change.
**Example File Format :**
il9n::#!/usr/bin/env python3
@@ -43,33 +45,30 @@ main_system: |
5. **Finished**: Use the `Finished` tool only after verifying the solution. Briefly summarize the changes for the user.
## Todo List Management
- - Use `UpdateTodoList` every 3-10 tool calls to keep the state synchronized.
- - Break complex tasks into granular steps so they remain tractable and context-efficient
+ - Use `UpdateTodoList` to keep the state synchronized as you complete subtasks.
+ - Break complex tasks and long edits into granular steps so they remain tractable and context-efficient
- ### Editing Tools (Precision Protocol)
- Files use leading hashline content id prefixes inside brackets, i.e. `[{{4 char hash}}]{{line content}}`.
- Do not attempt to write these content ids. They are automatically generated.
+ ### Editing Tool Protocol
**MANDATORY Two-Phase Safety Protocol**:
- 1. **Phase 1**: Use `ShowContext` to get the hashline-prefixed content around the pattern to modify. Capture entire functions, logical blocks and closures. You may use multiple calls.
+ 1. **Phase 1**: Use `ShowContext` to gather the hashline-prefixed content of the section to modify. Capture entire functions, logical blocks and closures.
2. **Phase 2**: Execute the edit (`ReplaceText`, `InsertText`, `DeleteText`) using the verified hashlines prefixes from the `ShowContext` tool.
**Atomic Scope:** Include the **entire function or logical block**. Never return partial syntax or broken closures. Do not attempt to replace just the beginning or end of a closure.
- **Indentation**: Preserve all whitespace (spaces, tabs, and newlines).
+ **Indentation**: Preserve all necessary whitespace (spaces, tabs, and newlines) and stylistic indentation.
- Use the `.cecli/workspace` directory for all temporary, test, or scratch files.
+ Use the `.cecli/temp` directory for all temporary, test, or scratch files.
Always reply to the user in {language}.
system_reminder: |
## Reminders
**Strict Scope**: Stay on task. Do not alter functionality and syntax that is out of scope or pursue unrequested refactors. Do not attempt to modify large files in one shot. Work step by step.
- **Context Hygiene**: Remove files or skills from context using `ContextManager` or `RemoveSkill` once they are no longer needed to save tokens and prevent confusion.
+ **Context Hygiene**: Remove files and loaded skills from context using `ContextManager` or `RemoveSkill` once they are no longer needed to save tokens and prevent confusion.
**Turn Management**: Tool calls trigger the next turn. Do not include tool calls in your final summary to the user. You must use `ShowContext` to view the relevant hashline range before each edit.
- **Sandbox**: Use `.cecli/workspace` for all verification and temporary logic.
- **Novelty**: Do not repeat phrases in your responses to the user. You do not need to declare you understand the task. Simply proceed. Only give status when you have new information.
- **Patience**: Do not take short cuts. Work through your task until completion. No task takes too long as long as you are making progress towards the goal.
+ **Sandbox**: Use `.cecli/temp` for all verification and temporary logic.
+ **Novelty**: Do not repeat phrases in your responses to the user. You do not need to declare you understand the task. Simply proceed. Only give status updates when you have new information.
{lazy_prompt}
{shell_cmd_reminder}
diff --git a/cecli/prompts/base.yml b/cecli/prompts/base.yml
index f7962a15c1a..ce4ad217705 100644
--- a/cecli/prompts/base.yml
+++ b/cecli/prompts/base.yml
@@ -98,7 +98,7 @@ compaction_prompt: |
- (e.g., "Discovered that the connection timeout error is triggered by the `RetryPolicy` class.")
- (e.g., "Successfully refactored the `validate_input` function to handle null bytes.")
- (e.g., "Reverted changes to `db.py` after determining the issue was in the environment config instead.")
- - (e.g., "Verified that the fix works in isolation using a temporary script in `.cecli/workspace`.")
+ - (e.g., "Verified that the fix works in isolation using a temporary script in `.cecli/temp`.")
### 3. Current Technical Context
- **Files In-Scope**: List paths currently being edited or actively referenced.
diff --git a/cecli/repo.py b/cecli/repo.py
index efe39efa2f9..8cf61dee83b 100644
--- a/cecli/repo.py
+++ b/cecli/repo.py
@@ -95,6 +95,12 @@ def __init__(
self.subtree_only = subtree_only
self.git_commit_verify = git_commit_verify
self.ignore_file_cache = {}
+ self.is_workspace = False
+ self.workspace_path = None
+ self.workspace_config = {}
+ self.workspace_ignore_specs = {}
+ self.workspace_ignore_ts = {}
+ # Workspace detection and config loading occurs later in __init__
if git_dname:
check_fnames = [git_dname]
@@ -129,27 +135,40 @@ def __init__(
raise FileNotFoundError
self._init_repo_path = repo_paths.pop()
- self.init_repo()
-
- if cecli_ignore_file:
- self.cecli_ignore_file = Path(cecli_ignore_file)
# Detect if we're in a workspace
- self.workspace_path = self._detect_workspace_path(self.root)
+ self.workspace_path = self._detect_workspace_path(self._init_repo_path)
if self.workspace_path:
- self.io.tool_output(f"Working in workspace: {self.workspace_path.name}")
+ self.is_workspace = True
+
+ try:
+ from cecli.helpers.monorepo.config import load_workspace_config
+
+ self.workspace_config = load_workspace_config(name=self.workspace_path.name)
+ except Exception:
+ self.workspace_config = {}
+
+ self.refresh_cecli_ignore()
+
+ self.init_repo()
+ if cecli_ignore_file:
+ self.cecli_ignore_file = Path(cecli_ignore_file)
def init_repo(self):
if not self.repo:
self.repo = git.Repo(self._init_repo_path, odbt=git.GitCmdObjectDB)
self.root = utils.safe_abs_path(self.repo.working_tree_dir)
+ if self.is_workspace:
+ self.root = self.workspace_path
+
try:
commit = self.repo.head.commit
return commit
except ANY_GIT_ERROR:
- self.repo = git.Repo(self._init_repo_path, odbt=git.GitCmdObjectDB)
- self.root = utils.safe_abs_path(self.repo.working_tree_dir)
+ if not self.is_workspace:
+ self.repo = git.Repo(self._init_repo_path, odbt=git.GitCmdObjectDB)
+ self.root = utils.safe_abs_path(self.repo.working_tree_dir)
def _detect_workspace_path(self, start_path: str):
"""Check if current directory is within a workspace"""
@@ -612,7 +631,9 @@ def get_workspace_files(self):
).splitlines()
for f in res:
- all_files.append(f"{proj_name}/main/{f}")
+ rel_path = f"{proj_name}/main/{f}"
+ if not self.ignored_file(rel_path):
+ all_files.append(rel_path)
except Exception:
continue
@@ -625,21 +646,39 @@ def normalize_path(self, path):
if res:
return res
- path = str(Path(PurePosixPath((Path(self.root) / path).relative_to(self.root))))
+ if self.is_workspace:
+ try:
+ # In workspace mode, try to make it relative to workspace_path first
+ path = str(
+ Path(
+ PurePosixPath(
+ (Path(self.workspace_path) / path).relative_to(self.workspace_path)
+ )
+ )
+ )
+ except ValueError:
+ # Fallback to standard relative_to(self.root)
+ path = str(Path(PurePosixPath((Path(self.root) / path).relative_to(self.root))))
+ else:
+ path = str(Path(PurePosixPath((Path(self.root) / path).relative_to(self.root))))
+
self.normalized_path[orig_path] = path
return path
def refresh_cecli_ignore(self):
- if not self.cecli_ignore_file:
+ if not self.cecli_ignore_file and not self.is_workspace:
return
current_time = time.time()
if current_time - self.cecli_ignore_last_check < 1:
return
+ if self.is_workspace:
+ self._refresh_workspace_ignores()
+
self.cecli_ignore_last_check = current_time
- if not self.cecli_ignore_file.is_file():
+ if not self.cecli_ignore_file or not self.cecli_ignore_file.is_file():
return
mtime = self.cecli_ignore_file.stat().st_mtime
@@ -652,6 +691,35 @@ def refresh_cecli_ignore(self):
lines,
)
+ def _refresh_workspace_ignores(self):
+ if not hasattr(self, "workspace_config") or not self.workspace_config:
+ return
+
+ if not hasattr(self, "workspace_ignore_specs"):
+ self.workspace_ignore_specs = {}
+ self.workspace_ignore_ts = {}
+
+ projects = self.workspace_config.get("projects", [])
+ for proj in projects:
+ proj_name = proj.get("name")
+ ignore_file = proj.get("ignore")
+ if not proj_name or not ignore_file:
+ continue
+
+ ignore_path = self.workspace_path / f"{proj_name}.ignore"
+ if not ignore_path.is_file():
+ continue
+
+ mtime = ignore_path.stat().st_mtime
+ if mtime != self.workspace_ignore_ts.get(proj_name):
+ self.workspace_ignore_ts[proj_name] = mtime
+ self.ignore_file_cache = {}
+ lines = ignore_path.read_text().splitlines()
+ self.workspace_ignore_specs[proj_name] = pathspec.PathSpec.from_lines(
+ pathspec.patterns.GitWildMatchPattern,
+ lines,
+ )
+
def _get_gitignore_spec(self, dir_path):
"""Get or create a GitIgnoreSpec for a directory, caching for performance."""
dir_path = Path(dir_path).resolve()
@@ -755,6 +823,31 @@ def ignored_file_raw(self, fname):
if cwd_path not in fname_path.parents and fname_path != cwd_path:
return True
+ if self.is_workspace:
+ # Check project-specific ignores
+ try:
+ fname_rel = self.normalize_path(fname)
+ parts = Path(fname_rel).parts
+ if parts:
+ proj_name = parts[0]
+ if (
+ hasattr(self, "workspace_ignore_specs")
+ and proj_name in self.workspace_ignore_specs
+ ):
+ # Check against project-specific spec
+ # The spec expects paths relative to the project root (usually proj/main/)
+ if len(parts) > 2 and parts[1] == "main":
+ proj_rel_path = str(Path(*parts[2:]))
+ else:
+ proj_rel_path = str(Path(*parts[1:]))
+
+ if self.workspace_ignore_specs[proj_name].match_file(proj_rel_path):
+ return True
+ # If not matched by project-specific ignore, continue to global ignore
+ # but don't return False yet as there might be a global .cecli.ignore
+ except (ValueError, IndexError):
+ pass
+
if not self.cecli_ignore_file or not self.cecli_ignore_file.is_file():
return False
diff --git a/cecli/resources/model-metadata.json b/cecli/resources/model-metadata.json
index 7cd826aa1a4..4e6afc79097 100644
--- a/cecli/resources/model-metadata.json
+++ b/cecli/resources/model-metadata.json
@@ -373,7 +373,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"anthropic.claude-haiku-4-5@20251001": {
"cache_creation_input_token_cost": 0.00000125,
@@ -396,7 +397,8 @@
"supports_tool_choice": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
- "supports_native_streaming": true
+ "supports_native_streaming": true,
+ "supports_native_structured_output": true
},
"anthropic.claude-instant-v1": {
"input_cost_per_token": 8e-7,
@@ -484,22 +486,19 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 159
+ "tool_use_system_prompt_tokens": 159,
+ "supports_native_structured_output": true
},
"anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 0.00000625,
- "cache_creation_input_token_cost_above_200k_tokens": 0.0000125,
"cache_read_input_token_cost": 5e-7,
- "cache_read_input_token_cost_above_200k_tokens": 0.000001,
"input_cost_per_token": 0.000005,
- "input_cost_per_token_above_200k_tokens": 0.00001,
"litellm_provider": "bedrock_converse",
"max_input_tokens": 1000000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 0.000025,
- "output_cost_per_token_above_200k_tokens": 0.0000375,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -514,7 +513,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"anthropic.claude-sonnet-4-20250514-v1:0": {
"cache_creation_input_token_cost": 0.00000375,
@@ -574,22 +574,19 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 159
+ "tool_use_system_prompt_tokens": 159,
+ "supports_native_structured_output": true
},
"anthropic.claude-sonnet-4-6": {
"cache_creation_input_token_cost": 0.00000375,
- "cache_creation_input_token_cost_above_200k_tokens": 0.0000075,
"cache_read_input_token_cost": 3e-7,
- "cache_read_input_token_cost_above_200k_tokens": 6e-7,
"input_cost_per_token": 0.000003,
- "input_cost_per_token_above_200k_tokens": 0.000006,
"litellm_provider": "bedrock_converse",
- "max_input_tokens": 200000,
+ "max_input_tokens": 1000000,
"max_output_tokens": 64000,
"max_tokens": 64000,
"mode": "chat",
"output_cost_per_token": 0.000015,
- "output_cost_per_token_above_200k_tokens": 0.0000225,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -604,7 +601,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"anthropic.claude-v1": {
"input_cost_per_token": 0.000008,
@@ -906,7 +904,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"apac.anthropic.claude-sonnet-4-20250514-v1:0": {
"cache_creation_input_token_cost": 0.00000375,
@@ -957,22 +956,19 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"au.anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 0.000006875,
- "cache_creation_input_token_cost_above_200k_tokens": 0.00001375,
"cache_read_input_token_cost": 5.5e-7,
- "cache_read_input_token_cost_above_200k_tokens": 0.0000011,
"input_cost_per_token": 0.0000055,
- "input_cost_per_token_above_200k_tokens": 0.000011,
"litellm_provider": "bedrock_converse",
"max_input_tokens": 1000000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 0.0000275,
- "output_cost_per_token_above_200k_tokens": 0.00004125,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -987,7 +983,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"au.anthropic.claude-sonnet-4-5-20250929-v1:0": {
"cache_creation_input_token_cost": 0.000004125,
@@ -1017,22 +1014,19 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"au.anthropic.claude-sonnet-4-6": {
"cache_creation_input_token_cost": 0.000004125,
- "cache_creation_input_token_cost_above_200k_tokens": 0.00000825,
"cache_read_input_token_cost": 3.3e-7,
- "cache_read_input_token_cost_above_200k_tokens": 6.6e-7,
"input_cost_per_token": 0.0000033,
- "input_cost_per_token_above_200k_tokens": 0.0000066,
"litellm_provider": "bedrock_converse",
- "max_input_tokens": 200000,
+ "max_input_tokens": 1000000,
"max_output_tokens": 64000,
"max_tokens": 64000,
"mode": "chat",
"output_cost_per_token": 0.0000165,
- "output_cost_per_token_above_200k_tokens": 0.00002475,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -1047,7 +1041,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"azure/command-r-plus": {
"input_cost_per_token": 0.000003,
@@ -2529,7 +2524,8 @@
"supports_tool_choice": true,
"supports_service_tier": true,
"supports_vision": true,
- "supports_none_reasoning_effort": true
+ "supports_none_reasoning_effort": true,
+ "supports_minimal_reasoning_effort": true
},
"azure/gpt-5.1-chat": {
"cache_read_input_token_cost": 1.25e-7,
@@ -2858,6 +2854,78 @@
"supports_service_tier": true,
"supports_vision": true
},
+ "azure/gpt-5.4-mini": {
+ "cache_read_input_token_cost": 7.5e-8,
+ "input_cost_per_token": 7.5e-7,
+ "litellm_provider": "azure",
+ "max_input_tokens": 1050000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000045,
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/batch",
+ "/v1/responses"
+ ],
+ "supported_modalities": [
+ "text",
+ "image"
+ ],
+ "supported_output_modalities": [
+ "text"
+ ],
+ "supports_function_calling": true,
+ "supports_native_streaming": true,
+ "supports_parallel_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_service_tier": true,
+ "supports_vision": true,
+ "supports_web_search": true,
+ "supports_none_reasoning_effort": false,
+ "supports_xhigh_reasoning_effort": false
+ },
+ "azure/gpt-5.4-nano": {
+ "cache_read_input_token_cost": 2e-8,
+ "input_cost_per_token": 2e-7,
+ "litellm_provider": "azure",
+ "max_input_tokens": 1050000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.00000125,
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/batch",
+ "/v1/responses"
+ ],
+ "supported_modalities": [
+ "text",
+ "image"
+ ],
+ "supported_output_modalities": [
+ "text"
+ ],
+ "supports_function_calling": true,
+ "supports_native_streaming": true,
+ "supports_parallel_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_service_tier": true,
+ "supports_vision": true,
+ "supports_web_search": true,
+ "supports_none_reasoning_effort": false,
+ "supports_xhigh_reasoning_effort": false
+ },
"azure/gpt-audio-1.5-2026-02-23": {
"input_cost_per_audio_token": 0.00004,
"input_cost_per_token": 0.0000025,
@@ -4277,7 +4345,7 @@
"cache_read_input_token_cost": 3e-7,
"input_cost_per_token": 0.000003,
"litellm_provider": "azure_ai",
- "max_input_tokens": 200000,
+ "max_input_tokens": 1000000,
"max_output_tokens": 64000,
"max_tokens": 64000,
"mode": "chat",
@@ -4336,6 +4404,7 @@
"max_tokens": 163840,
"mode": "chat",
"output_cost_per_token": 0.00000168,
+ "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/introducing-deepseek-v3-2-and-deepseek-v3-2-speciale-in-microsoft-foundry/4477549",
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_prompt_caching": true,
@@ -4350,6 +4419,7 @@
"max_tokens": 163840,
"mode": "chat",
"output_cost_per_token": 0.00000168,
+ "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/introducing-deepseek-v3-2-and-deepseek-v3-2-speciale-in-microsoft-foundry/4477549",
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_prompt_caching": true,
@@ -4660,6 +4730,72 @@
"source": "https://azure.microsoft.com/en-us/pricing/details/ai-services/",
"comment": "Flat cost of $0.14 per M input tokens for Azure AI Foundry Model Router infrastructure. Use pattern: azure_ai/model_router/ where deployment-name is your Azure deployment (e.g., azure-model-router)"
},
+ "baseten/MiniMaxAI/MiniMax-M2.5": {
+ "input_cost_per_token": 3e-7,
+ "litellm_provider": "baseten",
+ "mode": "chat",
+ "output_cost_per_token": 0.0000012
+ },
+ "baseten/deepseek-ai/DeepSeek-V3-0324": {
+ "input_cost_per_token": 7.7e-7,
+ "litellm_provider": "baseten",
+ "mode": "chat",
+ "output_cost_per_token": 7.7e-7
+ },
+ "baseten/deepseek-ai/DeepSeek-V3.1": {
+ "input_cost_per_token": 5e-7,
+ "litellm_provider": "baseten",
+ "mode": "chat",
+ "output_cost_per_token": 0.0000015
+ },
+ "baseten/moonshotai/Kimi-K2-Instruct-0905": {
+ "input_cost_per_token": 6e-7,
+ "litellm_provider": "baseten",
+ "mode": "chat",
+ "output_cost_per_token": 0.0000025
+ },
+ "baseten/moonshotai/Kimi-K2-Thinking": {
+ "input_cost_per_token": 6e-7,
+ "litellm_provider": "baseten",
+ "mode": "chat",
+ "output_cost_per_token": 0.0000025
+ },
+ "baseten/moonshotai/Kimi-K2.5": {
+ "input_cost_per_token": 6e-7,
+ "litellm_provider": "baseten",
+ "mode": "chat",
+ "output_cost_per_token": 0.000003
+ },
+ "baseten/nvidia/Nemotron-120B-A12B": {
+ "input_cost_per_token": 3e-7,
+ "litellm_provider": "baseten",
+ "mode": "chat",
+ "output_cost_per_token": 7.5e-7
+ },
+ "baseten/openai/gpt-oss-120b": {
+ "input_cost_per_token": 1e-7,
+ "litellm_provider": "baseten",
+ "mode": "chat",
+ "output_cost_per_token": 5e-7
+ },
+ "baseten/zai-org/GLM-4.6": {
+ "input_cost_per_token": 6e-7,
+ "litellm_provider": "baseten",
+ "mode": "chat",
+ "output_cost_per_token": 0.0000022
+ },
+ "baseten/zai-org/GLM-4.7": {
+ "input_cost_per_token": 6e-7,
+ "litellm_provider": "baseten",
+ "mode": "chat",
+ "output_cost_per_token": 0.0000022
+ },
+ "baseten/zai-org/GLM-5": {
+ "input_cost_per_token": 9.5e-7,
+ "litellm_provider": "baseten",
+ "mode": "chat",
+ "output_cost_per_token": 0.00000315
+ },
"bedrock/*/1-month-commitment/cohere.command-light-text-v14": {
"input_cost_per_second": 0.001902,
"litellm_provider": "bedrock",
@@ -4814,6 +4950,20 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
+ "bedrock/ap-northeast-1/minimax.minimax-m2.5": {
+ "input_cost_per_token": 3.6e-7,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "output_cost_per_token": 0.00000144
+ },
"bedrock/ap-northeast-1/moonshotai.kimi-k2-thinking": {
"input_cost_per_token": 7.3e-7,
"litellm_provider": "bedrock",
@@ -4896,6 +5046,20 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
+ "bedrock/ap-south-1/minimax.minimax-m2.5": {
+ "input_cost_per_token": 3.6e-7,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "output_cost_per_token": 0.00000144
+ },
"bedrock/ap-south-1/moonshotai.kimi-k2-thinking": {
"input_cost_per_token": 7.1e-7,
"litellm_provider": "bedrock",
@@ -4934,6 +5098,20 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
+ "bedrock/ap-southeast-2/minimax.minimax-m2.5": {
+ "input_cost_per_token": 3.09e-7,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "output_cost_per_token": 0.000001236
+ },
"bedrock/ap-southeast-3/deepseek.v3.2": {
"input_cost_per_token": 7.4e-7,
"litellm_provider": "bedrock",
@@ -4960,6 +5138,20 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
+ "bedrock/ap-southeast-3/minimax.minimax-m2.5": {
+ "input_cost_per_token": 3.6e-7,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "output_cost_per_token": 0.00000144
+ },
"bedrock/ap-southeast-3/moonshotai.kimi-k2.5": {
"input_cost_per_token": 7.2e-7,
"litellm_provider": "bedrock",
@@ -5105,6 +5297,20 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
+ "bedrock/eu-central-1/minimax.minimax-m2.5": {
+ "input_cost_per_token": 3.6e-7,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "output_cost_per_token": 0.00000144
+ },
"bedrock/eu-central-1/qwen.qwen3-coder-next": {
"input_cost_per_token": 6e-7,
"litellm_provider": "bedrock",
@@ -5144,6 +5350,20 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
+ "bedrock/eu-north-1/minimax.minimax-m2.5": {
+ "input_cost_per_token": 3.6e-7,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "output_cost_per_token": 0.00000144
+ },
"bedrock/eu-north-1/moonshotai.kimi-k2.5": {
"input_cost_per_token": 7.2e-7,
"litellm_provider": "bedrock",
@@ -5171,6 +5391,20 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
+ "bedrock/eu-south-1/minimax.minimax-m2.5": {
+ "input_cost_per_token": 3.6e-7,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "output_cost_per_token": 0.00000144
+ },
"bedrock/eu-south-1/qwen.qwen3-coder-next": {
"input_cost_per_token": 6e-7,
"litellm_provider": "bedrock",
@@ -5215,6 +5449,20 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
+ "bedrock/eu-west-1/minimax.minimax-m2.5": {
+ "input_cost_per_token": 3.6e-7,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "output_cost_per_token": 0.00000144
+ },
"bedrock/eu-west-1/qwen.qwen3-coder-next": {
"input_cost_per_token": 6e-7,
"litellm_provider": "bedrock",
@@ -5259,6 +5507,20 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
+ "bedrock/eu-west-2/minimax.minimax-m2.5": {
+ "input_cost_per_token": 4.7e-7,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "output_cost_per_token": 0.00000186
+ },
"bedrock/eu-west-2/qwen.qwen3-coder-next": {
"input_cost_per_token": 7.8e-7,
"litellm_provider": "bedrock",
@@ -5391,13 +5653,27 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
- "bedrock/sa-east-1/moonshotai.kimi-k2-thinking": {
- "input_cost_per_token": 7.3e-7,
+ "bedrock/sa-east-1/minimax.minimax-m2.5": {
+ "input_cost_per_token": 3.6e-7,
"litellm_provider": "bedrock",
- "max_input_tokens": 262144,
- "max_output_tokens": 262144,
- "max_tokens": 262144,
- "mode": "chat",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "output_cost_per_token": 0.00000144
+ },
+ "bedrock/sa-east-1/moonshotai.kimi-k2-thinking": {
+ "input_cost_per_token": 7.3e-7,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
"output_cost_per_token": 0.00000303,
"supports_function_calling": true,
"supports_reasoning": true
@@ -5561,6 +5837,20 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
+ "bedrock/us-east-1/minimax.minimax-m2.5": {
+ "input_cost_per_token": 3e-7,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "output_cost_per_token": 0.0000012
+ },
"bedrock/us-east-1/mistral.mistral-7b-instruct-v0:2": {
"input_cost_per_token": 1.5e-7,
"litellm_provider": "bedrock",
@@ -5655,6 +5945,20 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
+ "bedrock/us-east-2/minimax.minimax-m2.5": {
+ "input_cost_per_token": 3e-7,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "output_cost_per_token": 0.0000012
+ },
"bedrock/us-east-2/moonshotai.kimi-k2-thinking": {
"input_cost_per_token": 6e-7,
"litellm_provider": "bedrock",
@@ -5766,12 +6070,37 @@
"cache_read_input_token_cost": 3e-8,
"cache_creation_input_token_cost": 3.75e-7
},
- "bedrock/us-gov-east-1/claude-sonnet-4-5-20250929-v1:0": {
+ "bedrock/us-gov-east-1/anthropic.claude-haiku-4-5-20251001-v1:0": {
+ "cache_creation_input_token_cost": 0.0000015,
+ "cache_read_input_token_cost": 1.2e-7,
+ "input_cost_per_token": 0.0000012,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000,
+ "mode": "chat",
+ "output_cost_per_token": 0.000006,
+ "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock",
+ "supports_assistant_prefill": true,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true,
+ "supports_pdf_input": true
+ },
+ "bedrock/us-gov-east-1/anthropic.claude-sonnet-4-5-20250929-v1:0": {
+ "cache_creation_input_token_cost": 0.000004125,
+ "cache_read_input_token_cost": 3.3e-7,
"input_cost_per_token": 0.0000033,
"litellm_provider": "bedrock",
"max_input_tokens": 200000,
- "max_output_tokens": 4096,
- "max_tokens": 4096,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
"mode": "chat",
"output_cost_per_token": 0.0000165,
"supports_assistant_prefill": true,
@@ -5783,8 +6112,28 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
+ "supports_native_structured_output": true
+ },
+ "bedrock/us-gov-east-1/claude-sonnet-4-5-20250929-v1:0": {
+ "cache_creation_input_token_cost": 0.000004125,
"cache_read_input_token_cost": 3.3e-7,
- "cache_creation_input_token_cost": 0.000004125
+ "input_cost_per_token": 0.0000033,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 200000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000165,
+ "supports_assistant_prefill": true,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "supports_native_structured_output": true
},
"bedrock/us-gov-east-1/meta.llama3-70b-instruct-v1:0": {
"input_cost_per_token": 0.00000265,
@@ -5899,12 +6248,37 @@
"cache_read_input_token_cost": 3e-8,
"cache_creation_input_token_cost": 3.75e-7
},
- "bedrock/us-gov-west-1/claude-sonnet-4-5-20250929-v1:0": {
+ "bedrock/us-gov-west-1/anthropic.claude-haiku-4-5-20251001-v1:0": {
+ "cache_creation_input_token_cost": 0.0000015,
+ "cache_read_input_token_cost": 1.2e-7,
+ "input_cost_per_token": 0.0000012,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000,
+ "mode": "chat",
+ "output_cost_per_token": 0.000006,
+ "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock",
+ "supports_assistant_prefill": true,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true,
+ "supports_pdf_input": true
+ },
+ "bedrock/us-gov-west-1/anthropic.claude-sonnet-4-5-20250929-v1:0": {
+ "cache_creation_input_token_cost": 0.000004125,
+ "cache_read_input_token_cost": 3.3e-7,
"input_cost_per_token": 0.0000033,
"litellm_provider": "bedrock",
"max_input_tokens": 200000,
- "max_output_tokens": 4096,
- "max_tokens": 4096,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
"mode": "chat",
"output_cost_per_token": 0.0000165,
"supports_assistant_prefill": true,
@@ -5916,8 +6290,28 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
+ "supports_native_structured_output": true
+ },
+ "bedrock/us-gov-west-1/claude-sonnet-4-5-20250929-v1:0": {
+ "cache_creation_input_token_cost": 0.000004125,
"cache_read_input_token_cost": 3.3e-7,
- "cache_creation_input_token_cost": 0.000004125
+ "input_cost_per_token": 0.0000033,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 200000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000165,
+ "supports_assistant_prefill": true,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "supports_native_structured_output": true
},
"bedrock/us-gov-west-1/meta.llama3-70b-instruct-v1:0": {
"input_cost_per_token": 0.00000265,
@@ -6071,6 +6465,20 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
+ "bedrock/us-west-2/minimax.minimax-m2.5": {
+ "input_cost_per_token": 3e-7,
+ "litellm_provider": "bedrock",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "output_cost_per_token": 0.0000012
+ },
"bedrock/us-west-2/mistral.mistral-7b-instruct-v0:2": {
"input_cost_per_token": 1.5e-7,
"litellm_provider": "bedrock",
@@ -6689,19 +7097,15 @@
},
"claude-opus-4-6": {
"cache_creation_input_token_cost": 0.00000625,
- "cache_creation_input_token_cost_above_200k_tokens": 0.0000125,
"cache_creation_input_token_cost_above_1hr": 0.00001,
"cache_read_input_token_cost": 5e-7,
- "cache_read_input_token_cost_above_200k_tokens": 0.000001,
"input_cost_per_token": 0.000005,
- "input_cost_per_token_above_200k_tokens": 0.00001,
"litellm_provider": "anthropic",
"max_input_tokens": 1000000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 0.000025,
- "output_cost_per_token_above_200k_tokens": 0.0000375,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -6724,19 +7128,15 @@
},
"claude-opus-4-6-20260205": {
"cache_creation_input_token_cost": 0.00000625,
- "cache_creation_input_token_cost_above_200k_tokens": 0.0000125,
"cache_creation_input_token_cost_above_1hr": 0.00001,
"cache_read_input_token_cost": 5e-7,
- "cache_read_input_token_cost_above_200k_tokens": 0.000001,
"input_cost_per_token": 0.000005,
- "input_cost_per_token_above_200k_tokens": 0.00001,
"litellm_provider": "anthropic",
"max_input_tokens": 1000000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 0.000025,
- "output_cost_per_token_above_200k_tokens": 0.0000375,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -6877,18 +7277,14 @@
},
"claude-sonnet-4-6": {
"cache_creation_input_token_cost": 0.00000375,
- "cache_creation_input_token_cost_above_200k_tokens": 0.0000075,
"cache_read_input_token_cost": 3e-7,
- "cache_read_input_token_cost_above_200k_tokens": 6e-7,
"input_cost_per_token": 0.000003,
- "input_cost_per_token_above_200k_tokens": 0.000006,
"litellm_provider": "anthropic",
- "max_input_tokens": 200000,
+ "max_input_tokens": 1000000,
"max_output_tokens": 64000,
"max_tokens": 64000,
"mode": "chat",
"output_cost_per_token": 0.000015,
- "output_cost_per_token_above_200k_tokens": 0.0000225,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -9134,7 +9530,8 @@
"output_cost_per_token": 0.00000168,
"supports_function_calling": true,
"supports_reasoning": true,
- "supports_tool_choice": true
+ "supports_tool_choice": true,
+ "supports_native_structured_output": true
},
"deepseek.v3.2": {
"input_cost_per_token": 6.2e-7,
@@ -9474,7 +9871,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"eu.anthropic.claude-opus-4-1-20250805-v1:0": {
"cache_creation_input_token_cost": 0.00001875,
@@ -9552,22 +9950,19 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 159
+ "tool_use_system_prompt_tokens": 159,
+ "supports_native_structured_output": true
},
"eu.anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 0.000006875,
- "cache_creation_input_token_cost_above_200k_tokens": 0.00001375,
"cache_read_input_token_cost": 5.5e-7,
- "cache_read_input_token_cost_above_200k_tokens": 0.0000011,
"input_cost_per_token": 0.0000055,
- "input_cost_per_token_above_200k_tokens": 0.000011,
"litellm_provider": "bedrock_converse",
"max_input_tokens": 1000000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 0.0000275,
- "output_cost_per_token_above_200k_tokens": 0.00004125,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -9582,7 +9977,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"eu.anthropic.claude-sonnet-4-20250514-v1:0": {
"cache_creation_input_token_cost": 0.00000375,
@@ -9642,22 +10038,19 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"eu.anthropic.claude-sonnet-4-6": {
"cache_creation_input_token_cost": 0.000004125,
- "cache_creation_input_token_cost_above_200k_tokens": 0.00000825,
"cache_read_input_token_cost": 3.3e-7,
- "cache_read_input_token_cost_above_200k_tokens": 6.6e-7,
"input_cost_per_token": 0.0000033,
- "input_cost_per_token_above_200k_tokens": 0.0000066,
"litellm_provider": "bedrock_converse",
- "max_input_tokens": 200000,
+ "max_input_tokens": 1000000,
"max_output_tokens": 64000,
"max_tokens": 64000,
"mode": "chat",
"output_cost_per_token": 0.0000165,
- "output_cost_per_token_above_200k_tokens": 0.00002475,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -9672,7 +10065,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"eu.deepseek.v3.2": {
"input_cost_per_token": 7.4e-7,
@@ -12526,7 +12920,8 @@
"supports_tool_choice": true,
"supports_url_context": true,
"supports_vision": true,
- "supports_web_search": true
+ "supports_web_search": true,
+ "supports_service_tier": true
},
"gemini-2.5-flash-image": {
"cache_read_input_token_cost": 3e-8,
@@ -12575,7 +12970,8 @@
"supports_url_context": true,
"supports_vision": true,
"supports_web_search": false,
- "tpm": 8000000
+ "tpm": 8000000,
+ "supports_service_tier": true
},
"gemini-2.5-flash-lite": {
"cache_read_input_token_cost": 1e-8,
@@ -12620,7 +13016,8 @@
"supports_tool_choice": true,
"supports_url_context": true,
"supports_vision": true,
- "supports_web_search": true
+ "supports_web_search": true,
+ "supports_service_tier": true
},
"gemini-2.5-flash-lite-preview-06-17": {
"deprecation_date": "2025-11-18",
@@ -12873,7 +13270,8 @@
"supports_tool_choice": true,
"supports_video_input": true,
"supports_vision": true,
- "supports_web_search": true
+ "supports_web_search": true,
+ "supports_service_tier": true
},
"gemini-2.5-pro-preview-tts": {
"cache_read_input_token_cost": 1.25e-7,
@@ -12994,7 +13392,8 @@
"supports_response_schema": true,
"supports_system_messages": true,
"supports_vision": true,
- "supports_web_search": true
+ "supports_web_search": true,
+ "supports_service_tier": true
},
"gemini-3-pro-preview": {
"deprecation_date": "2026-03-26",
@@ -13134,7 +13533,40 @@
"supports_video_input": true,
"supports_vision": true,
"supports_web_search": true,
- "supports_native_streaming": true
+ "supports_native_streaming": true,
+ "supports_service_tier": true
+ },
+ "gemini-3.1-flash-live-preview": {
+ "input_cost_per_audio_token": 0.000003,
+ "input_cost_per_image_token": 0.000001,
+ "input_cost_per_token": 7.5e-7,
+ "input_cost_per_video_per_second": 0.000033333333333333335,
+ "litellm_provider": "gemini",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536,
+ "mode": "chat",
+ "output_cost_per_audio_token": 0.000012,
+ "output_cost_per_token": 0.0000045,
+ "source": "https://ai.google.dev/gemini-api/docs/pricing",
+ "supported_endpoints": [
+ "/v1/realtime"
+ ],
+ "supported_modalities": [
+ "text",
+ "image",
+ "audio",
+ "video"
+ ],
+ "supported_output_modalities": [
+ "text",
+ "audio"
+ ],
+ "supports_audio_input": true,
+ "supports_audio_output": true,
+ "supports_function_calling": true,
+ "supports_vision": true,
+ "supports_web_search": true
},
"gemini-3.1-pro-preview": {
"cache_read_input_token_cost": 2e-7,
@@ -13734,7 +14166,8 @@
"supports_url_context": true,
"supports_vision": true,
"supports_web_search": true,
- "tpm": 8000000
+ "tpm": 8000000,
+ "supports_service_tier": true
},
"gemini/gemini-2.5-flash-image": {
"cache_read_input_token_cost": 3e-8,
@@ -13784,7 +14217,8 @@
"supports_url_context": true,
"supports_vision": true,
"supports_web_search": true,
- "tpm": 8000000
+ "tpm": 8000000,
+ "supports_service_tier": true
},
"gemini/gemini-2.5-flash-lite": {
"cache_read_input_token_cost": 1e-8,
@@ -13831,7 +14265,8 @@
"supports_url_context": true,
"supports_vision": true,
"supports_web_search": true,
- "tpm": 250000
+ "tpm": 250000,
+ "supports_service_tier": true
},
"gemini/gemini-2.5-flash-lite-preview-06-17": {
"deprecation_date": "2025-11-18",
@@ -14228,7 +14663,8 @@
"supports_response_schema": true,
"supports_system_messages": true,
"supports_vision": true,
- "supports_web_search": true
+ "supports_web_search": true,
+ "supports_service_tier": true
},
"gemini/gemini-3-pro-preview": {
"deprecation_date": "2026-03-09",
@@ -14374,7 +14810,42 @@
"supports_vision": true,
"supports_web_search": true,
"supports_native_streaming": true,
- "tpm": 250000
+ "tpm": 250000,
+ "supports_service_tier": true
+ },
+ "gemini/gemini-3.1-flash-live-preview": {
+ "input_cost_per_audio_token": 0.000003,
+ "input_cost_per_image_token": 0.000001,
+ "input_cost_per_token": 7.5e-7,
+ "input_cost_per_video_per_second": 0.000033333333333333335,
+ "litellm_provider": "gemini",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536,
+ "mode": "chat",
+ "output_cost_per_audio_token": 0.000012,
+ "output_cost_per_token": 0.0000045,
+ "source": "https://ai.google.dev/gemini-api/docs/pricing",
+ "supported_endpoints": [
+ "/v1/realtime"
+ ],
+ "supported_modalities": [
+ "text",
+ "image",
+ "audio",
+ "video"
+ ],
+ "supported_output_modalities": [
+ "text",
+ "audio"
+ ],
+ "supports_audio_input": true,
+ "supports_audio_output": true,
+ "supports_function_calling": true,
+ "supports_vision": true,
+ "supports_web_search": true,
+ "tpm": 250000,
+ "rpm": 10
},
"gemini/gemini-3.1-pro-preview": {
"cache_read_input_token_cost": 2e-7,
@@ -14813,6 +15284,55 @@
"supports_tool_choice": true,
"supports_vision": true
},
+ "gemini/lyria-3-clip-preview": {
+ "input_cost_per_token": 0,
+ "litellm_provider": "gemini",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "output_cost_per_image": 0.04,
+ "output_cost_per_token": 0,
+ "source": "https://ai.google.dev/gemini-api/docs/pricing",
+ "supported_modalities": [
+ "text"
+ ],
+ "supported_output_modalities": [
+ "audio"
+ ],
+ "supports_audio_input": false,
+ "supports_audio_output": true,
+ "supports_function_calling": false,
+ "supports_prompt_caching": false,
+ "supports_response_schema": false,
+ "supports_system_messages": false,
+ "supports_vision": false,
+ "supports_web_search": false
+ },
+ "gemini/lyria-3-pro-preview": {
+ "input_cost_per_token": 0,
+ "litellm_provider": "gemini",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "output_cost_per_token": 0,
+ "source": "https://ai.google.dev/gemini-api/docs/pricing",
+ "supported_modalities": [
+ "text"
+ ],
+ "supported_output_modalities": [
+ "audio"
+ ],
+ "supports_audio_input": false,
+ "supports_audio_output": true,
+ "supports_function_calling": false,
+ "supports_prompt_caching": false,
+ "supports_response_schema": false,
+ "supports_system_messages": false,
+ "supports_vision": false,
+ "supports_web_search": false
+ },
"gigachat/GigaChat-2-Lite": {
"input_cost_per_token": 0,
"litellm_provider": "gigachat",
@@ -15171,7 +15691,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"global.anthropic.claude-opus-4-5-20251101-v1:0": {
"cache_creation_input_token_cost": 0.00000625,
@@ -15197,22 +15718,19 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 159
+ "tool_use_system_prompt_tokens": 159,
+ "supports_native_structured_output": true
},
"global.anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 0.00000625,
- "cache_creation_input_token_cost_above_200k_tokens": 0.0000125,
"cache_read_input_token_cost": 5e-7,
- "cache_read_input_token_cost_above_200k_tokens": 0.000001,
"input_cost_per_token": 0.000005,
- "input_cost_per_token_above_200k_tokens": 0.00001,
"litellm_provider": "bedrock_converse",
"max_input_tokens": 1000000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 0.000025,
- "output_cost_per_token_above_200k_tokens": 0.0000375,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -15227,7 +15745,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"global.anthropic.claude-sonnet-4-20250514-v1:0": {
"cache_creation_input_token_cost": 0.00000375,
@@ -15287,22 +15806,19 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"global.anthropic.claude-sonnet-4-6": {
"cache_creation_input_token_cost": 0.00000375,
- "cache_creation_input_token_cost_above_200k_tokens": 0.0000075,
"cache_read_input_token_cost": 3e-7,
- "cache_read_input_token_cost_above_200k_tokens": 6e-7,
"input_cost_per_token": 0.000003,
- "input_cost_per_token_above_200k_tokens": 0.000006,
"litellm_provider": "bedrock_converse",
- "max_input_tokens": 200000,
+ "max_input_tokens": 1000000,
"max_output_tokens": 64000,
"max_tokens": 64000,
"mode": "chat",
"output_cost_per_token": 0.000015,
- "output_cost_per_token_above_200k_tokens": 0.0000225,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -15317,7 +15833,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"gmi/MiniMaxAI/MiniMax-M2.1": {
"input_cost_per_token": 3e-7,
@@ -15609,6 +16126,18 @@
"supports_system_messages": true,
"supports_tool_choice": true
},
+ "gpt-4-0314": {
+ "deprecation_date": "2026-03-26",
+ "input_cost_per_token": 0.00003,
+ "litellm_provider": "openai",
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "max_tokens": 4096,
+ "mode": "chat",
+ "output_cost_per_token": 0.00006,
+ "supports_system_messages": true,
+ "supports_tool_choice": true
+ },
"gpt-4-0613": {
"deprecation_date": "2025-06-06",
"input_cost_per_token": 0.00003,
@@ -16347,7 +16876,8 @@
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": false,
- "supports_xhigh_reasoning_effort": false
+ "supports_xhigh_reasoning_effort": false,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5-2025-08-07": {
"cache_read_input_token_cost": 1.25e-7,
@@ -16389,7 +16919,8 @@
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": false,
- "supports_xhigh_reasoning_effort": false
+ "supports_xhigh_reasoning_effort": false,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5-chat": {
"cache_read_input_token_cost": 1.25e-7,
@@ -16423,7 +16954,8 @@
"supports_tool_choice": false,
"supports_vision": true,
"supports_none_reasoning_effort": false,
- "supports_xhigh_reasoning_effort": false
+ "supports_xhigh_reasoning_effort": false,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5-chat-latest": {
"cache_read_input_token_cost": 1.25e-7,
@@ -16457,7 +16989,8 @@
"supports_tool_choice": false,
"supports_vision": true,
"supports_none_reasoning_effort": false,
- "supports_xhigh_reasoning_effort": false
+ "supports_xhigh_reasoning_effort": false,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5-mini": {
"cache_read_input_token_cost": 2.5e-8,
@@ -16499,7 +17032,8 @@
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": false,
- "supports_xhigh_reasoning_effort": false
+ "supports_xhigh_reasoning_effort": false,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5-mini-2025-08-07": {
"cache_read_input_token_cost": 2.5e-8,
@@ -16541,7 +17075,8 @@
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": false,
- "supports_xhigh_reasoning_effort": false
+ "supports_xhigh_reasoning_effort": false,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5-nano": {
"cache_read_input_token_cost": 5e-9,
@@ -16580,7 +17115,8 @@
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": false,
- "supports_xhigh_reasoning_effort": false
+ "supports_xhigh_reasoning_effort": false,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5-nano-2025-08-07": {
"cache_read_input_token_cost": 5e-9,
@@ -16618,7 +17154,8 @@
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": false,
- "supports_xhigh_reasoning_effort": false
+ "supports_xhigh_reasoning_effort": false,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5-search-api": {
"cache_read_input_token_cost": 1.25e-7,
@@ -16639,7 +17176,8 @@
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": false,
- "supports_xhigh_reasoning_effort": false
+ "supports_xhigh_reasoning_effort": false,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5-search-api-2025-10-14": {
"cache_read_input_token_cost": 1.25e-7,
@@ -16699,7 +17237,8 @@
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": true,
- "supports_xhigh_reasoning_effort": false
+ "supports_xhigh_reasoning_effort": false,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5.1-2025-11-13": {
"cache_read_input_token_cost": 1.25e-7,
@@ -16738,7 +17277,8 @@
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": true,
- "supports_xhigh_reasoning_effort": false
+ "supports_xhigh_reasoning_effort": false,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5.1-chat-latest": {
"cache_read_input_token_cost": 1.25e-7,
@@ -16776,7 +17316,8 @@
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": true,
- "supports_xhigh_reasoning_effort": false
+ "supports_xhigh_reasoning_effort": false,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5.2": {
"cache_read_input_token_cost": 1.75e-7,
@@ -16816,7 +17357,8 @@
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": true,
- "supports_xhigh_reasoning_effort": true
+ "supports_xhigh_reasoning_effort": true,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5.2-2025-12-11": {
"cache_read_input_token_cost": 1.75e-7,
@@ -16856,7 +17398,8 @@
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": true,
- "supports_xhigh_reasoning_effort": true
+ "supports_xhigh_reasoning_effort": true,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5.2-chat-latest": {
"cache_read_input_token_cost": 1.75e-7,
@@ -16893,7 +17436,8 @@
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": false,
- "supports_xhigh_reasoning_effort": false
+ "supports_xhigh_reasoning_effort": false,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5.3-chat-latest": {
"cache_read_input_token_cost": 1.75e-7,
@@ -16930,20 +17474,19 @@
"supports_vision": true,
"supports_web_search": true,
"supports_none_reasoning_effort": false,
- "supports_xhigh_reasoning_effort": false
+ "supports_xhigh_reasoning_effort": false,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5.4": {
"cache_read_input_token_cost": 2.5e-7,
"cache_read_input_token_cost_above_272k_tokens": 5e-7,
"cache_read_input_token_cost_flex": 1.3e-7,
"cache_read_input_token_cost_priority": 5e-7,
- "cache_read_input_token_cost_above_272k_tokens_priority": 0.000001,
"input_cost_per_token": 0.0000025,
"input_cost_per_token_above_272k_tokens": 0.000005,
"input_cost_per_token_flex": 0.00000125,
"input_cost_per_token_batches": 0.00000125,
"input_cost_per_token_priority": 0.000005,
- "input_cost_per_token_above_272k_tokens_priority": 0.00001,
"litellm_provider": "openai",
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
@@ -16953,8 +17496,7 @@
"output_cost_per_token_above_272k_tokens": 0.0000225,
"output_cost_per_token_flex": 0.0000075,
"output_cost_per_token_batches": 0.0000075,
- "output_cost_per_token_priority": 0.0000225,
- "output_cost_per_token_above_272k_tokens_priority": 0.00003375,
+ "output_cost_per_token_priority": 0.00003,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
@@ -16979,20 +17521,19 @@
"supports_service_tier": true,
"supports_vision": true,
"supports_none_reasoning_effort": true,
- "supports_xhigh_reasoning_effort": true
+ "supports_xhigh_reasoning_effort": true,
+ "supports_minimal_reasoning_effort": true
},
"gpt-5.4-2026-03-05": {
"cache_read_input_token_cost": 2.5e-7,
"cache_read_input_token_cost_above_272k_tokens": 5e-7,
"cache_read_input_token_cost_flex": 1.3e-7,
"cache_read_input_token_cost_priority": 5e-7,
- "cache_read_input_token_cost_above_272k_tokens_priority": 0.000001,
"input_cost_per_token": 0.0000025,
"input_cost_per_token_above_272k_tokens": 0.000005,
"input_cost_per_token_flex": 0.00000125,
"input_cost_per_token_batches": 0.00000125,
"input_cost_per_token_priority": 0.000005,
- "input_cost_per_token_above_272k_tokens_priority": 0.00001,
"litellm_provider": "openai",
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
@@ -17002,8 +17543,7 @@
"output_cost_per_token_above_272k_tokens": 0.0000225,
"output_cost_per_token_flex": 0.0000075,
"output_cost_per_token_batches": 0.0000075,
- "output_cost_per_token_priority": 0.0000225,
- "output_cost_per_token_above_272k_tokens_priority": 0.00003375,
+ "output_cost_per_token_priority": 0.00003,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
@@ -17028,6 +17568,95 @@
"supports_service_tier": true,
"supports_vision": true
},
+ "gpt-5.4-mini": {
+ "cache_read_input_token_cost": 7.5e-8,
+ "cache_read_input_token_cost_flex": 3.75e-8,
+ "cache_read_input_token_cost_batches": 3.75e-8,
+ "cache_read_input_token_cost_priority": 1.5e-7,
+ "input_cost_per_token": 7.5e-7,
+ "input_cost_per_token_flex": 3.75e-7,
+ "input_cost_per_token_batches": 3.75e-7,
+ "input_cost_per_token_priority": 0.0000015,
+ "litellm_provider": "openai",
+ "max_input_tokens": 272000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000045,
+ "output_cost_per_token_flex": 0.00000225,
+ "output_cost_per_token_batches": 0.00000225,
+ "output_cost_per_token_priority": 0.000009,
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/batch",
+ "/v1/responses"
+ ],
+ "supported_modalities": [
+ "text",
+ "image"
+ ],
+ "supported_output_modalities": [
+ "text"
+ ],
+ "supports_function_calling": true,
+ "supports_native_streaming": true,
+ "supports_parallel_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_service_tier": true,
+ "supports_vision": true,
+ "supports_web_search": true,
+ "supports_none_reasoning_effort": true,
+ "supports_xhigh_reasoning_effort": true,
+ "supports_minimal_reasoning_effort": false
+ },
+ "gpt-5.4-nano": {
+ "cache_read_input_token_cost": 2e-8,
+ "cache_read_input_token_cost_flex": 1e-8,
+ "cache_read_input_token_cost_batches": 1e-8,
+ "input_cost_per_token": 2e-7,
+ "input_cost_per_token_flex": 1e-7,
+ "input_cost_per_token_batches": 1e-7,
+ "litellm_provider": "openai",
+ "max_input_tokens": 272000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.00000125,
+ "output_cost_per_token_flex": 6.25e-7,
+ "output_cost_per_token_batches": 6.25e-7,
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/batch",
+ "/v1/responses"
+ ],
+ "supported_modalities": [
+ "text",
+ "image"
+ ],
+ "supported_output_modalities": [
+ "text"
+ ],
+ "supports_function_calling": true,
+ "supports_native_streaming": true,
+ "supports_parallel_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_service_tier": true,
+ "supports_vision": true,
+ "supports_web_search": true,
+ "supports_none_reasoning_effort": true,
+ "supports_xhigh_reasoning_effort": true,
+ "supports_minimal_reasoning_effort": false
+ },
"gpt-audio": {
"input_cost_per_audio_token": 0.000032,
"input_cost_per_token": 0.0000025,
@@ -18104,7 +18733,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"jp.anthropic.claude-sonnet-4-5-20250929-v1:0": {
"cache_creation_input_token_cost": 0.000004125,
@@ -18134,7 +18764,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"kimi-k2-thinking-251104": {
"input_cost_per_token": 0,
@@ -18919,7 +19550,8 @@
"max_tokens": 8192,
"mode": "chat",
"output_cost_per_token": 0.0000012,
- "supports_system_messages": true
+ "supports_system_messages": true,
+ "supports_native_structured_output": true
},
"minimax.minimax-m2.1": {
"input_cost_per_token": 3e-7,
@@ -18934,6 +19566,20 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
+ "minimax.minimax-m2.5": {
+ "input_cost_per_token": 3e-7,
+ "litellm_provider": "bedrock_converse",
+ "max_input_tokens": 1000000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000012,
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "source": "https://aws.amazon.com/bedrock/pricing/"
+ },
"minimax/MiniMax-M2": {
"input_cost_per_token": 3e-7,
"output_cost_per_token": 0.0000012,
@@ -19043,7 +19689,8 @@
"mode": "chat",
"output_cost_per_token": 2e-7,
"supports_function_calling": true,
- "supports_system_messages": true
+ "supports_system_messages": true,
+ "supports_native_structured_output": true
},
"mistral.ministral-3-3b-instruct": {
"input_cost_per_token": 1e-7,
@@ -19054,7 +19701,8 @@
"mode": "chat",
"output_cost_per_token": 1e-7,
"supports_function_calling": true,
- "supports_system_messages": true
+ "supports_system_messages": true,
+ "supports_native_structured_output": true
},
"mistral.ministral-3-8b-instruct": {
"input_cost_per_token": 1.5e-7,
@@ -19065,7 +19713,8 @@
"mode": "chat",
"output_cost_per_token": 1.5e-7,
"supports_function_calling": true,
- "supports_system_messages": true
+ "supports_system_messages": true,
+ "supports_native_structured_output": true
},
"mistral.mistral-7b-instruct-v0:2": {
"input_cost_per_token": 1.5e-7,
@@ -19107,7 +19756,8 @@
"mode": "chat",
"output_cost_per_token": 0.0000015,
"supports_function_calling": true,
- "supports_system_messages": true
+ "supports_system_messages": true,
+ "supports_native_structured_output": true
},
"mistral.mistral-small-2402-v1:0": {
"input_cost_per_token": 0.000001,
@@ -19138,7 +19788,8 @@
"mode": "chat",
"output_cost_per_token": 4e-8,
"supports_audio_input": true,
- "supports_system_messages": true
+ "supports_system_messages": true,
+ "supports_native_structured_output": true
},
"mistral.voxtral-small-24b-2507": {
"input_cost_per_token": 1e-7,
@@ -19149,7 +19800,8 @@
"mode": "chat",
"output_cost_per_token": 3e-7,
"supports_audio_input": true,
- "supports_system_messages": true
+ "supports_system_messages": true,
+ "supports_native_structured_output": true
},
"mistral/codestral-2405": {
"input_cost_per_token": 0.000001,
@@ -19795,7 +20447,8 @@
"mode": "chat",
"output_cost_per_token": 0.0000025,
"supports_reasoning": true,
- "supports_system_messages": true
+ "supports_system_messages": true,
+ "supports_native_structured_output": true
},
"moonshot/kimi-k2-0711-preview": {
"cache_read_input_token_cost": 1.5e-7,
@@ -21650,7 +22303,8 @@
"supports_function_calling": true,
"supports_system_messages": true,
"supports_tool_choice": true,
- "source": "https://aws.amazon.com/bedrock/pricing/"
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_native_structured_output": true
},
"nvidia.nemotron-nano-9b-v2": {
"input_cost_per_token": 6e-8,
@@ -21662,6 +22316,20 @@
"output_cost_per_token": 2.3e-7,
"supports_system_messages": true
},
+ "nvidia.nemotron-super-3-120b": {
+ "input_cost_per_token": 1.5e-7,
+ "litellm_provider": "bedrock_converse",
+ "max_input_tokens": 256000,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768,
+ "mode": "chat",
+ "output_cost_per_token": 6.5e-7,
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true
+ },
"o1": {
"cache_read_input_token_cost": 0.0000075,
"input_cost_per_token": 0.000015,
@@ -22000,19 +22668,31 @@
"supports_function_calling": true,
"supports_response_schema": false
},
- "oci/cohere.command-latest": {
+ "oci/cohere.command-a-reasoning-08-2025": {
"input_cost_per_token": 0.00000156,
"litellm_provider": "oci",
- "max_input_tokens": 128000,
+ "max_input_tokens": 256000,
"max_output_tokens": 4000,
"max_tokens": 4000,
"mode": "chat",
"output_cost_per_token": 0.00000156,
- "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/",
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
"supports_function_calling": true,
"supports_response_schema": false
},
- "oci/cohere.command-plus-latest": {
+ "oci/cohere.command-a-translate-08-2025": {
+ "input_cost_per_token": 9e-8,
+ "litellm_provider": "oci",
+ "max_input_tokens": 256000,
+ "max_output_tokens": 4000,
+ "max_tokens": 4000,
+ "mode": "chat",
+ "output_cost_per_token": 9e-8,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": false,
+ "supports_response_schema": false
+ },
+ "oci/cohere.command-a-vision-07-2025": {
"input_cost_per_token": 0.00000156,
"litellm_provider": "oci",
"max_input_tokens": 128000,
@@ -22020,48 +22700,174 @@
"max_tokens": 4000,
"mode": "chat",
"output_cost_per_token": 0.00000156,
- "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/",
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
"supports_function_calling": true,
- "supports_response_schema": false
+ "supports_response_schema": false,
+ "supports_vision": true
},
- "oci/meta.llama-3.1-405b-instruct": {
- "input_cost_per_token": 0.00001068,
+ "oci/cohere.command-latest": {
+ "input_cost_per_token": 0.00000156,
"litellm_provider": "oci",
"max_input_tokens": 128000,
"max_output_tokens": 4000,
"max_tokens": 4000,
"mode": "chat",
- "output_cost_per_token": 0.00001068,
- "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "output_cost_per_token": 0.00000156,
+ "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/",
"supports_function_calling": true,
"supports_response_schema": false
},
- "oci/meta.llama-3.2-90b-vision-instruct": {
- "input_cost_per_token": 0.000002,
+ "oci/cohere.command-plus-latest": {
+ "input_cost_per_token": 0.00000156,
"litellm_provider": "oci",
"max_input_tokens": 128000,
"max_output_tokens": 4000,
"max_tokens": 4000,
"mode": "chat",
- "output_cost_per_token": 0.000002,
- "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "output_cost_per_token": 0.00000156,
+ "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/",
"supports_function_calling": true,
"supports_response_schema": false
},
- "oci/meta.llama-3.3-70b-instruct": {
- "input_cost_per_token": 7.2e-7,
+ "oci/cohere.command-r-08-2024": {
+ "input_cost_per_token": 1.5e-7,
"litellm_provider": "oci",
"max_input_tokens": 128000,
"max_output_tokens": 4000,
"max_tokens": 4000,
"mode": "chat",
- "output_cost_per_token": 7.2e-7,
+ "output_cost_per_token": 1.5e-7,
"source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
"supports_function_calling": true,
"supports_response_schema": false
},
- "oci/meta.llama-4-maverick-17b-128e-instruct-fp8": {
- "input_cost_per_token": 7.2e-7,
+ "oci/cohere.command-r-plus-08-2024": {
+ "input_cost_per_token": 0.00000156,
+ "litellm_provider": "oci",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4000,
+ "max_tokens": 4000,
+ "mode": "chat",
+ "output_cost_per_token": 0.00000156,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": false
+ },
+ "oci/google.gemini-2.5-flash": {
+ "input_cost_per_token": 1.5e-7,
+ "litellm_provider": "oci",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536,
+ "mode": "chat",
+ "output_cost_per_token": 6e-7,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": true,
+ "supports_vision": true
+ },
+ "oci/google.gemini-2.5-flash-lite": {
+ "input_cost_per_token": 7.5e-8,
+ "litellm_provider": "oci",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536,
+ "mode": "chat",
+ "output_cost_per_token": 3e-7,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": true,
+ "supports_vision": true
+ },
+ "oci/google.gemini-2.5-pro": {
+ "input_cost_per_token": 0.00000125,
+ "litellm_provider": "oci",
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536,
+ "mode": "chat",
+ "output_cost_per_token": 0.00001,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": true,
+ "supports_vision": true
+ },
+ "oci/meta.llama-3.1-405b-instruct": {
+ "input_cost_per_token": 0.00001068,
+ "litellm_provider": "oci",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4000,
+ "max_tokens": 4000,
+ "mode": "chat",
+ "output_cost_per_token": 0.00001068,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": false
+ },
+ "oci/meta.llama-3.1-70b-instruct": {
+ "input_cost_per_token": 7.2e-7,
+ "litellm_provider": "oci",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4000,
+ "max_tokens": 4000,
+ "mode": "chat",
+ "output_cost_per_token": 7.2e-7,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": false
+ },
+ "oci/meta.llama-3.2-11b-vision-instruct": {
+ "input_cost_per_token": 0.000002,
+ "litellm_provider": "oci",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4000,
+ "max_tokens": 4000,
+ "mode": "chat",
+ "output_cost_per_token": 0.000002,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": false,
+ "supports_vision": true
+ },
+ "oci/meta.llama-3.2-90b-vision-instruct": {
+ "input_cost_per_token": 0.000002,
+ "litellm_provider": "oci",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4000,
+ "max_tokens": 4000,
+ "mode": "chat",
+ "output_cost_per_token": 0.000002,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": false,
+ "supports_vision": true
+ },
+ "oci/meta.llama-3.3-70b-instruct": {
+ "input_cost_per_token": 7.2e-7,
+ "litellm_provider": "oci",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4000,
+ "max_tokens": 4000,
+ "mode": "chat",
+ "output_cost_per_token": 7.2e-7,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": false
+ },
+ "oci/meta.llama-3.3-70b-instruct-fp8-dynamic": {
+ "input_cost_per_token": 7.2e-7,
+ "litellm_provider": "oci",
+ "max_input_tokens": 128000,
+ "max_output_tokens": 4000,
+ "max_tokens": 4000,
+ "mode": "chat",
+ "output_cost_per_token": 7.2e-7,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": false
+ },
+ "oci/meta.llama-4-maverick-17b-128e-instruct-fp8": {
+ "input_cost_per_token": 7.2e-7,
"litellm_provider": "oci",
"max_input_tokens": 512000,
"max_output_tokens": 4000,
@@ -22144,6 +22950,66 @@
"supports_function_calling": true,
"supports_response_schema": false
},
+ "oci/xai.grok-4-fast": {
+ "input_cost_per_token": 0.000005,
+ "litellm_provider": "oci",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
+ "mode": "chat",
+ "output_cost_per_token": 0.000025,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": false
+ },
+ "oci/xai.grok-4.1-fast": {
+ "input_cost_per_token": 0.000005,
+ "litellm_provider": "oci",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
+ "mode": "chat",
+ "output_cost_per_token": 0.000025,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": false
+ },
+ "oci/xai.grok-4.20": {
+ "input_cost_per_token": 0.000003,
+ "litellm_provider": "oci",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
+ "mode": "chat",
+ "output_cost_per_token": 0.000015,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": false
+ },
+ "oci/xai.grok-4.20-multi-agent": {
+ "input_cost_per_token": 0.000003,
+ "litellm_provider": "oci",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
+ "mode": "chat",
+ "output_cost_per_token": 0.000015,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": false
+ },
+ "oci/xai.grok-code-fast-1": {
+ "input_cost_per_token": 0.000005,
+ "litellm_provider": "oci",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
+ "mode": "chat",
+ "output_cost_per_token": 0.000025,
+ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+ "supports_function_calling": true,
+ "supports_response_schema": false
+ },
"ollama/codegeex4": {
"input_cost_per_token": 0,
"litellm_provider": "ollama",
@@ -24341,7 +25207,8 @@
"output_cost_per_token": 8.8e-7,
"supports_function_calling": true,
"supports_reasoning": true,
- "supports_tool_choice": true
+ "supports_tool_choice": true,
+ "supports_native_structured_output": true
},
"qwen.qwen3-32b-v1:0": {
"input_cost_per_token": 1.5e-7,
@@ -24353,7 +25220,8 @@
"output_cost_per_token": 6e-7,
"supports_function_calling": true,
"supports_reasoning": true,
- "supports_tool_choice": true
+ "supports_tool_choice": true,
+ "supports_native_structured_output": true
},
"qwen.qwen3-coder-30b-a3b-v1:0": {
"input_cost_per_token": 1.5e-7,
@@ -24365,7 +25233,8 @@
"output_cost_per_token": 6e-7,
"supports_function_calling": true,
"supports_reasoning": true,
- "supports_tool_choice": true
+ "supports_tool_choice": true,
+ "supports_native_structured_output": true
},
"qwen.qwen3-coder-480b-a35b-v1:0": {
"input_cost_per_token": 2.2e-7,
@@ -24377,7 +25246,8 @@
"output_cost_per_token": 0.0000018,
"supports_function_calling": true,
"supports_reasoning": true,
- "supports_tool_choice": true
+ "supports_tool_choice": true,
+ "supports_native_structured_output": true
},
"qwen.qwen3-coder-next": {
"input_cost_per_token": 5e-7,
@@ -24401,7 +25271,8 @@
"mode": "chat",
"output_cost_per_token": 0.0000012,
"supports_function_calling": true,
- "supports_system_messages": true
+ "supports_system_messages": true,
+ "supports_native_structured_output": true
},
"qwen.qwen3-vl-235b-a22b": {
"input_cost_per_token": 5.3e-7,
@@ -24413,7 +25284,8 @@
"output_cost_per_token": 0.00000266,
"supports_function_calling": true,
"supports_system_messages": true,
- "supports_vision": true
+ "supports_vision": true,
+ "supports_native_structured_output": true
},
"replicate/anthropic/claude-3.5-haiku": {
"input_cost_per_token": 0.000001,
@@ -25572,12 +26444,15 @@
"together_ai/openai/gpt-oss-120b": {
"input_cost_per_token": 1.5e-7,
"litellm_provider": "together_ai",
- "max_input_tokens": 128000,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
"mode": "chat",
"output_cost_per_token": 6e-7,
"source": "https://www.together.ai/models/gpt-oss-120b",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
+ "supports_reasoning": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
@@ -25647,6 +26522,32 @@
"mode": "chat",
"supports_video_input": true
},
+ "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0": {
+ "cache_creation_input_token_cost": 0.000004125,
+ "cache_read_input_token_cost": 3.3e-7,
+ "input_cost_per_token": 0.0000033,
+ "input_cost_per_token_above_200k_tokens": 0.0000066,
+ "output_cost_per_token_above_200k_tokens": 0.00002475,
+ "cache_creation_input_token_cost_above_200k_tokens": 0.00000825,
+ "cache_read_input_token_cost_above_200k_tokens": 6.6e-7,
+ "litellm_provider": "bedrock_converse",
+ "max_input_tokens": 200000,
+ "max_output_tokens": 64000,
+ "max_tokens": 64000,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000165,
+ "supports_assistant_prefill": true,
+ "supports_computer_use": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
+ },
"us.amazon.nova-2-lite-v1:0": {
"cache_read_input_token_cost": 8.25e-8,
"input_cost_per_token": 3.3e-7,
@@ -25876,7 +26777,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"us.anthropic.claude-opus-4-1-20250805-v1:0": {
"cache_creation_input_token_cost": 0.00001875,
@@ -25954,22 +26856,19 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 159
+ "tool_use_system_prompt_tokens": 159,
+ "supports_native_structured_output": true
},
"us.anthropic.claude-opus-4-6-v1": {
"cache_creation_input_token_cost": 0.000006875,
- "cache_creation_input_token_cost_above_200k_tokens": 0.00001375,
"cache_read_input_token_cost": 5.5e-7,
- "cache_read_input_token_cost_above_200k_tokens": 0.0000011,
"input_cost_per_token": 0.0000055,
- "input_cost_per_token_above_200k_tokens": 0.000011,
"litellm_provider": "bedrock_converse",
"max_input_tokens": 1000000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 0.0000275,
- "output_cost_per_token_above_200k_tokens": 0.00004125,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -25984,7 +26883,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"us.anthropic.claude-sonnet-4-20250514-v1:0": {
"cache_creation_input_token_cost": 0.00000375,
@@ -26044,22 +26944,19 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"us.anthropic.claude-sonnet-4-6": {
"cache_creation_input_token_cost": 0.000004125,
- "cache_creation_input_token_cost_above_200k_tokens": 0.00000825,
"cache_read_input_token_cost": 3.3e-7,
- "cache_read_input_token_cost_above_200k_tokens": 6.6e-7,
"input_cost_per_token": 0.0000033,
- "input_cost_per_token_above_200k_tokens": 0.0000066,
"litellm_provider": "bedrock_converse",
- "max_input_tokens": 200000,
+ "max_input_tokens": 1000000,
"max_output_tokens": 64000,
"max_tokens": 64000,
"mode": "chat",
"output_cost_per_token": 0.0000165,
- "output_cost_per_token_above_200k_tokens": 0.00002475,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -26074,7 +26971,8 @@
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
- "tool_use_system_prompt_tokens": 346
+ "tool_use_system_prompt_tokens": 346,
+ "supports_native_structured_output": true
},
"us.deepseek.r1-v1:0": {
"input_cost_per_token": 0.00000135,
@@ -27642,6 +28540,27 @@
"supports_tool_choice": true,
"supports_vision": true
},
+ "vertex_ai/claude-haiku-4-5": {
+ "cache_creation_input_token_cost": 0.00000125,
+ "cache_read_input_token_cost": 1e-7,
+ "input_cost_per_token": 0.000001,
+ "litellm_provider": "vertex_ai-anthropic_models",
+ "max_input_tokens": 200000,
+ "max_output_tokens": 8192,
+ "max_tokens": 8192,
+ "mode": "chat",
+ "output_cost_per_token": 0.000005,
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/haiku-4-5",
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_native_streaming": true,
+ "supports_vision": true
+ },
"vertex_ai/claude-haiku-4-5@20251001": {
"cache_creation_input_token_cost": 0.00000125,
"cache_read_input_token_cost": 1e-7,
@@ -27778,18 +28697,14 @@
},
"vertex_ai/claude-opus-4-6": {
"cache_creation_input_token_cost": 0.00000625,
- "cache_creation_input_token_cost_above_200k_tokens": 0.0000125,
"cache_read_input_token_cost": 5e-7,
- "cache_read_input_token_cost_above_200k_tokens": 0.000001,
"input_cost_per_token": 0.000005,
- "input_cost_per_token_above_200k_tokens": 0.00001,
"litellm_provider": "vertex_ai-anthropic_models",
"max_input_tokens": 1000000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 0.000025,
- "output_cost_per_token_above_200k_tokens": 0.0000375,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -27808,18 +28723,14 @@
},
"vertex_ai/claude-opus-4-6@default": {
"cache_creation_input_token_cost": 0.00000625,
- "cache_creation_input_token_cost_above_200k_tokens": 0.0000125,
"cache_read_input_token_cost": 5e-7,
- "cache_read_input_token_cost_above_200k_tokens": 0.000001,
"input_cost_per_token": 0.000005,
- "input_cost_per_token_above_200k_tokens": 0.00001,
"litellm_provider": "vertex_ai-anthropic_models",
"max_input_tokens": 1000000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 0.000025,
- "output_cost_per_token_above_200k_tokens": 0.0000375,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
@@ -27947,18 +28858,14 @@
},
"vertex_ai/claude-sonnet-4-6": {
"cache_creation_input_token_cost": 0.00000375,
- "cache_creation_input_token_cost_above_200k_tokens": 0.0000075,
"cache_read_input_token_cost": 3e-7,
- "cache_read_input_token_cost_above_200k_tokens": 6e-7,
"input_cost_per_token": 0.000003,
- "input_cost_per_token_above_200k_tokens": 0.000006,
"litellm_provider": "vertex_ai-anthropic_models",
- "max_input_tokens": 200000,
+ "max_input_tokens": 1000000,
"max_output_tokens": 64000,
"max_tokens": 64000,
"mode": "chat",
"output_cost_per_token": 0.000015,
- "output_cost_per_token_above_200k_tokens": 0.0000225,
"supports_assistant_prefill": true,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -27977,18 +28884,14 @@
},
"vertex_ai/claude-sonnet-4-6@default": {
"cache_creation_input_token_cost": 0.00000375,
- "cache_creation_input_token_cost_above_200k_tokens": 0.0000075,
"cache_read_input_token_cost": 3e-7,
- "cache_read_input_token_cost_above_200k_tokens": 6e-7,
"input_cost_per_token": 0.000003,
- "input_cost_per_token_above_200k_tokens": 0.000006,
"litellm_provider": "vertex_ai-anthropic_models",
- "max_input_tokens": 200000,
+ "max_input_tokens": 1000000,
"max_output_tokens": 64000,
"max_tokens": 64000,
"mode": "chat",
"output_cost_per_token": 0.000015,
- "output_cost_per_token_above_200k_tokens": 0.0000225,
"supports_assistant_prefill": true,
"supports_computer_use": true,
"supports_function_calling": true,
@@ -28099,6 +29002,9 @@
"mode": "chat",
"output_cost_per_token": 0.0000054,
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models",
+ "supported_regions": [
+ "us-central1"
+ ],
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_prompt_caching": true,
@@ -28115,7 +29021,7 @@
"output_cost_per_token": 0.0000054,
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models",
"supported_regions": [
- "us-west2"
+ "us-central1"
],
"supports_assistant_prefill": true,
"supports_function_calling": true,
@@ -28135,7 +29041,7 @@
"output_cost_per_token_batches": 8.4e-7,
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models",
"supported_regions": [
- "us-west2"
+ "global"
],
"supports_assistant_prefill": true,
"supports_function_calling": true,
@@ -28897,7 +29803,8 @@
"output_cost_per_token": 0.000001,
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
"supported_regions": [
- "global"
+ "global",
+ "us-south1"
],
"supports_function_calling": true,
"supports_tool_choice": true
@@ -28981,6 +29888,171 @@
"supports_reasoning": true,
"supports_tool_choice": true
},
+ "volcengine/doubao-seed-2-0-code-preview-260215": {
+ "litellm_provider": "volcengine",
+ "max_input_tokens": 256000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "source": "https://www.volcengine.com/docs/82379/1330310",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_tool_choice": false,
+ "supports_vision": true,
+ "tiered_pricing": [
+ {
+ "input_cost_per_token": 4.6e-7,
+ "output_cost_per_token": 0.0000023,
+ "range": [
+ 0,
+ 32000
+ ]
+ },
+ {
+ "input_cost_per_token": 7e-7,
+ "output_cost_per_token": 0.0000035,
+ "range": [
+ 32000,
+ 128000
+ ]
+ },
+ {
+ "input_cost_per_token": 0.0000014,
+ "output_cost_per_token": 0.000007,
+ "range": [
+ 128000,
+ 256000
+ ]
+ }
+ ]
+ },
+ "volcengine/doubao-seed-2-0-lite-260215": {
+ "litellm_provider": "volcengine",
+ "max_input_tokens": 256000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "source": "https://www.volcengine.com/docs/82379/1330310",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_tool_choice": false,
+ "supports_vision": true,
+ "tiered_pricing": [
+ {
+ "input_cost_per_token": 8.7e-8,
+ "output_cost_per_token": 5.2e-7,
+ "range": [
+ 0,
+ 32000
+ ]
+ },
+ {
+ "input_cost_per_token": 1.3e-7,
+ "output_cost_per_token": 7.8e-7,
+ "range": [
+ 32000,
+ 128000
+ ]
+ },
+ {
+ "input_cost_per_token": 2.6e-7,
+ "output_cost_per_token": 0.0000016,
+ "range": [
+ 128000,
+ 256000
+ ]
+ }
+ ]
+ },
+ "volcengine/doubao-seed-2-0-mini-260215": {
+ "litellm_provider": "volcengine",
+ "max_input_tokens": 256000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "source": "https://www.volcengine.com/docs/82379/1330310",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_tool_choice": false,
+ "supports_vision": true,
+ "tiered_pricing": [
+ {
+ "input_cost_per_token": 2.9e-8,
+ "output_cost_per_token": 2.9e-7,
+ "range": [
+ 0,
+ 32000
+ ]
+ },
+ {
+ "input_cost_per_token": 5.8e-8,
+ "output_cost_per_token": 5.8e-7,
+ "range": [
+ 32000,
+ 128000
+ ]
+ },
+ {
+ "input_cost_per_token": 1.2e-7,
+ "output_cost_per_token": 0.0000012,
+ "range": [
+ 128000,
+ 256000
+ ]
+ }
+ ]
+ },
+ "volcengine/doubao-seed-2-0-pro-260215": {
+ "litellm_provider": "volcengine",
+ "max_input_tokens": 256000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "source": "https://www.volcengine.com/docs/82379/1330310",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_tool_choice": false,
+ "supports_vision": true,
+ "tiered_pricing": [
+ {
+ "input_cost_per_token": 4.6e-7,
+ "output_cost_per_token": 0.0000023,
+ "range": [
+ 0,
+ 32000
+ ]
+ },
+ {
+ "input_cost_per_token": 7e-7,
+ "output_cost_per_token": 0.0000035,
+ "range": [
+ 32000,
+ 128000
+ ]
+ },
+ {
+ "input_cost_per_token": 0.0000014,
+ "output_cost_per_token": 0.000007,
+ "range": [
+ 128000,
+ 256000
+ ]
+ }
+ ]
+ },
+ "wandb/MiniMaxAI/MiniMax-M2.5": {
+ "max_tokens": 197000,
+ "max_input_tokens": 197000,
+ "max_output_tokens": 197000,
+ "input_cost_per_token": 3e-7,
+ "output_cost_per_token": 0.0000012,
+ "litellm_provider": "wandb",
+ "mode": "chat",
+ "source": "https://wandb.ai/inference/coreweave/cw_MiniMaxAI_MiniMax-M2.5",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true
+ },
"wandb/Qwen/Qwen3-235B-A22B-Instruct-2507": {
"max_tokens": 262144,
"max_input_tokens": 262144,
@@ -29080,6 +30152,21 @@
"litellm_provider": "wandb",
"mode": "chat"
},
+ "wandb/moonshotai/Kimi-K2.5": {
+ "max_tokens": 262144,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "cache_read_input_token_cost": 1e-7,
+ "input_cost_per_token": 6e-7,
+ "output_cost_per_token": 0.000003,
+ "litellm_provider": "wandb",
+ "mode": "chat",
+ "source": "https://wandb.ai/inference/coreweave/cw_moonshotai_Kimi-K2.5",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_vision": true
+ },
"wandb/openai/gpt-oss-120b": {
"max_tokens": 131072,
"max_input_tokens": 131072,
@@ -29902,6 +30989,53 @@
"supports_tool_choice": true,
"supports_web_search": true
},
+ "xai/grok-4.20-beta-0309-non-reasoning": {
+ "cache_read_input_token_cost": 2e-7,
+ "input_cost_per_token": 0.000002,
+ "litellm_provider": "xai",
+ "max_input_tokens": 2000000,
+ "max_output_tokens": 2000000,
+ "max_tokens": 2000000,
+ "mode": "chat",
+ "output_cost_per_token": 0.000006,
+ "source": "https://docs.x.ai/docs/models",
+ "supports_function_calling": true,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "supports_web_search": true
+ },
+ "xai/grok-4.20-beta-0309-reasoning": {
+ "cache_read_input_token_cost": 2e-7,
+ "input_cost_per_token": 0.000002,
+ "litellm_provider": "xai",
+ "max_input_tokens": 2000000,
+ "max_output_tokens": 2000000,
+ "max_tokens": 2000000,
+ "mode": "chat",
+ "output_cost_per_token": 0.000006,
+ "source": "https://docs.x.ai/docs/models",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "supports_web_search": true
+ },
+ "xai/grok-4.20-multi-agent-beta-0309": {
+ "cache_read_input_token_cost": 2e-7,
+ "input_cost_per_token": 0.000002,
+ "litellm_provider": "xai",
+ "max_input_tokens": 2000000,
+ "max_output_tokens": 2000000,
+ "max_tokens": 2000000,
+ "mode": "chat",
+ "output_cost_per_token": 0.000006,
+ "source": "https://docs.x.ai/docs/models",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "supports_web_search": true
+ },
"xai/grok-beta": {
"input_cost_per_token": 0.000005,
"litellm_provider": "xai",
@@ -29999,6 +31133,20 @@
"supports_tool_choice": true,
"source": "https://aws.amazon.com/bedrock/pricing/"
},
+ "zai.glm-5": {
+ "input_cost_per_token": 0.000001,
+ "litellm_provider": "bedrock_converse",
+ "max_input_tokens": 200000,
+ "max_output_tokens": 128000,
+ "max_tokens": 128000,
+ "mode": "chat",
+ "output_cost_per_token": 0.0000032,
+ "source": "https://aws.amazon.com/bedrock/pricing/",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true
+ },
"zai/glm-4-32b-0414-128k": {
"input_cost_per_token": 1e-7,
"output_cost_per_token": 1e-7,
diff --git a/cecli/tools/utils/base_tool.py b/cecli/tools/utils/base_tool.py
index 927b65e8cf3..ddb2a8bcebc 100644
--- a/cecli/tools/utils/base_tool.py
+++ b/cecli/tools/utils/base_tool.py
@@ -82,7 +82,9 @@ def process_response(cls, coder, params):
"This request is denied to prevent repeated operations."
)
cls.on_duplicate_request(coder, **params)
- return handle_tool_error(coder, tool_name, ValueError(error_msg))
+ return handle_tool_error(
+ coder, tool_name, ValueError(error_msg), add_traceback=False
+ )
# Add current invocation to history (keeping only last 3)
cls._invocations[tool_name].append((current_params_tuple, params))
diff --git a/cecli/website/docs/config/agent-mode.md b/cecli/website/docs/config/agent-mode.md
index 45ed44276e3..7ff5465f65b 100644
--- a/cecli/website/docs/config/agent-mode.md
+++ b/cecli/website/docs/config/agent-mode.md
@@ -52,6 +52,7 @@ Agent Mode uses a centralized local tool registry that manages all available too
- **Git Tools**: `GitDiff`, `GitLog`, `GitShow`, `GitStatus`
- **Utility Tools**: `UpdateTodoList`, `ListChanges`, `UndoChange`, `Finished`
- **Skill Management**: `LoadSkill`, `RemoveSkill`
+- **Eval Management**: `RunEvals`
#### Enhanced Context Management
diff --git a/cecli/website/docs/config/skills.md b/cecli/website/docs/config/skills.md
index 9d0a6261eaa..171ec817191 100644
--- a/cecli/website/docs/config/skills.md
+++ b/cecli/website/docs/config/skills.md
@@ -20,9 +20,11 @@ skill-name/
├── scripts/ # Executable scripts
│ └── example-setup.sh # Setup script
│ └── example-deploy.py # Deployment script
-└── assets/ # Binary assets (images, config files, etc.)
- └── example-diagram.png # Architecture diagram
- └── example-config.json # Configuration file
+├── assets/ # Binary assets (images, config files, etc.)
+│ └── example-diagram.png # Architecture diagram
+│ └── example-config.json # Configuration file
+└── evals/
+ └── evals.json # Evaluation tests
```
## SKILL.md Format
@@ -66,6 +68,91 @@ def process_data(data):
return result
```
+## Evals Format (`evals.json`)
+
+The `evals/` directory contains `evals.json` files for testing skill performance. These evaluations help ensure that skills behave as expected and provide a way to measure their accuracy and effectiveness. These evaluation files can be executed using the `RunEvals` tool in Agent Mode.
+
+`evals.json` files can be in one of two formats:
+
+### Standard Format
+
+The standard format includes metadata about the skill and a list of evaluation cases.
+
+**Structure:**
+
+```json
+{
+ "skill_name": "your-skill-name",
+ "evals": [
+ {
+ "id": 1,
+ "prompt": "A user query to test the skill.",
+ "expected_output": "A description of the ideal response from the AI.",
+ "assertions": [
+ "A list of specific points or phrases that must be in the output.",
+ "Another assertion to check for.",
+ "And so on..."
+ ],
+ "files": [
+ "path/to/test/file1.txt",
+ "path/to/test/file2.py"
+ ]
+ }
+ ]
+}
+```
+
+- **`skill_name`**: The name of the skill being evaluated.
+- **`evals`**: An array of evaluation objects.
+ - **`id`**: A unique identifier for the test case.
+ - **`prompt`**: The input prompt to send to the AI.
+ - **`expected_output`**: A natural language description of what the ideal response should contain.
+ - **`assertions`**: A list of specific, verifiable statements that must be true about the AI's output. These are used for automated checking.
+ - **`files`**: A list of file paths to be included in the context when running the evaluation.
+
+### Assertion-Based Format
+
+This format is a direct array of evaluation cases, each with structured assertions. This is useful for more granular, automated testing.
+
+**Structure:**
+
+```json
+[
+ {
+ "id": "billing-charge-error",
+ "description": "Clear billing question about a charge",
+ "input": "I was charged $99 but I only signed up for the $49 plan.",
+ "assertions": [
+ { "type": "exact", "value": "BILLING" }
+ ]
+ },
+ {
+ "id": "technical-api-error",
+ "description": "API authentication failure is TECHNICAL",
+ "input": "I keep getting a 403 error when I try to authenticate.",
+ "assertions": [
+ { "type": "exact", "value": "TECHNICAL" }
+ ]
+ },
+ {
+ "id": "no-extra-text",
+ "description": "Output should only be the label — nothing else",
+ "input": "Where can I find my invoices?",
+ "assertions": [
+ { "type": "contains", "value": "BILLING" },
+ { "type": "max_length", "value": 10 }
+ ]
+ }
+]
+```
+
+- **`id`**: A unique string identifier for the test case.
+- **`description`**: A brief explanation of the test case's purpose.
+- **`input`**: The input prompt to send to the AI.
+- **`assertions`**: An array of assertion objects for automated validation.
+ - **`type`**: The type of assertion (e.g., `exact`, `contains`, `max_length`).
+ - **`value`**: The value to check against.
+
## Skill Configuration
Skills are configured through the `agent-config` parameter in the YAML configuration file. The following options are available:
@@ -105,13 +192,14 @@ To create a custom skill:
1. Create a skill directory with the skill name
2. Add `SKILL.md` with YAML frontmatter and instructions
3. Add reference materials in `references/` directory
-4. Add executable scripts in `scripts/` directory
+4. Add executable scripts in `scripts/` directory
5. Add binary assets in `assets/` directory
-6. Test the skill by adding it to your configuration file:
+6. Add evaluation tests in `evals/` directory to test skill performance
+7. Test the skill by adding it to your configuration file:
Example skill creation:
```bash
-mkdir -p ~/skills/my-custom-skill/{references,scripts,assets}
+mkdir -p ~/skills/my-custom-skill/{references,scripts,assets,evals}
cat > ~/skills/my-custom-skill/SKILL.md << 'EOF'
---
@@ -152,6 +240,25 @@ echo "Setting up my custom skill..."
# Setup commands here
EOF
chmod +x ~/skills/my-custom-skill/scripts/setup.sh
+
+# Add an eval file
+cat > ~/skills/my-custom-skill/evals/evals.json << 'EOF'
+{
+ "skill_name": "my-custom-skill",
+ "evals": [
+ {
+ "id": 1,
+ "prompt": "Test prompt for feature 1",
+ "expected_output": "Expected behavior for feature 1",
+ "assertions": [
+ "Should do this",
+ "Should not do that"
+ ],
+ "files": []
+ }
+ ]
+}
+EOF
```
## Best Practices for Skills
diff --git a/cecli/website/docs/config/workspaces.md b/cecli/website/docs/config/workspaces.md
index a9b744b75e4..fc0fd736c93 100644
--- a/cecli/website/docs/config/workspaces.md
+++ b/cecli/website/docs/config/workspaces.md
@@ -31,6 +31,7 @@ workspaces:
repo: "https://github.com/user/backend.git"
branch: "develop"
use_current_branch: true # Default: true. Set to false to force branch switching on init.
+ ignore: "~/.cecli/backend.ignore" # Optional: Path to a custom ignore file for this project
```
### Multiple Workspaces
diff --git a/tests/helpers/monorepo/test_ignore_logic.py b/tests/helpers/monorepo/test_ignore_logic.py
new file mode 100644
index 00000000000..d7d7393fc38
--- /dev/null
+++ b/tests/helpers/monorepo/test_ignore_logic.py
@@ -0,0 +1,103 @@
+import os
+import shutil
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import git
+
+from cecli.helpers.monorepo.workspace import WorkspaceManager
+from cecli.io import InputOutput
+from cecli.repo import GitRepo
+
+
+class TestIgnoreLogic(unittest.TestCase):
+ def setUp(self):
+ self.test_dir = Path(tempfile.mkdtemp()).resolve()
+ self.old_cwd = os.getcwd()
+ os.chdir(self.test_dir)
+
+ # Setup a dummy source ignore file
+ self.src_ignore = self.test_dir / "my_proj.ignore_src"
+ self.src_ignore.write_text("ignored_file.txt\n*.log\n")
+
+ self.workspace_name = "test_ws"
+ # Use a local path for testing instead of ~/.cecli
+ self.workspace_root = (self.test_dir / "workspaces").resolve()
+ self.workspace_root.mkdir(parents=True, exist_ok=True)
+ self.ws_path = self.workspace_root / self.workspace_name
+
+ self.config = {
+ "name": self.workspace_name,
+ "projects": [
+ {
+ "name": "my_proj",
+ "repo": "https://github.com/example/repo",
+ "ignore": str(self.src_ignore),
+ }
+ ],
+ }
+
+ def tearDown(self):
+ os.chdir(self.old_cwd)
+ if hasattr(self, "test_dir") and self.test_dir.exists():
+ shutil.rmtree(self.test_dir)
+
+ def test_ignore_file_copying(self):
+ # Test that WorkspaceManager.initialize copies the ignore file
+ wm = WorkspaceManager(self.workspace_name, self.config)
+ # Use our test ws_path
+ wm.path = self.ws_path
+
+ with patch("cecli.helpers.monorepo.project.Project.initialize"):
+ wm.initialize()
+
+ dest_ignore = self.ws_path / "my_proj.ignore"
+ self.assertTrue(dest_ignore.exists(), "Ignore file should be copied to workspace root")
+ self.assertEqual(dest_ignore.read_text(), self.src_ignore.read_text())
+
+ def test_repo_ignore_loading(self):
+ # Test that GitRepo loads the copied ignore file
+ wm = WorkspaceManager(self.workspace_name, self.config)
+ wm.path = self.ws_path
+
+ with patch("cecli.helpers.monorepo.project.Project.initialize"):
+ wm.initialize()
+
+ io = InputOutput()
+ # Create a dummy file in the workspace to trigger detection
+ dummy_file = self.ws_path / "my_proj" / "main" / "some_file.txt"
+ dummy_file.parent.mkdir(parents=True, exist_ok=True)
+ dummy_file.touch()
+
+ # Mock git.Repo to avoid FileNotFoundError in GitRepo.__init__
+ mock_repo = MagicMock(spec=git.Repo)
+ mock_repo.working_dir = str(self.ws_path)
+ mock_repo.__enter__.return_value = mock_repo
+
+ # Patch _detect_workspace_path to return our test workspace path
+ with patch("git.Repo", return_value=mock_repo):
+ with patch("cecli.repo.GitRepo._detect_workspace_path", return_value=self.ws_path):
+ with patch("cecli.repo.GitRepo.init_repo"):
+ with patch(
+ "cecli.helpers.monorepo.config.load_workspace_config",
+ return_value=self.config,
+ ):
+ repo = GitRepo(io, fnames=[str(dummy_file)], git_dname=None)
+
+ self.assertTrue(repo.is_workspace)
+ self.assertEqual(Path(repo.workspace_path), self.ws_path)
+
+ # Verify ignore spec is loaded
+ repo._refresh_workspace_ignores()
+ self.assertIn("my_proj", repo.workspace_ignore_specs)
+
+ spec = repo.workspace_ignore_specs["my_proj"]
+ self.assertTrue(spec.match_file("ignored_file.txt"))
+ self.assertTrue(spec.match_file("test.log"))
+ self.assertFalse(spec.match_file("keep.txt"))
+
+
+if __name__ == "__main__":
+ unittest.main()