From 0cda1c3a3a39b55b104dc216c3667cc8b65215d9 Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 26 Nov 2025 14:01:19 -0800 Subject: [PATCH 01/17] fix: Add BadGatewayError to exceptions list for retry. Co-authored-by: aider (openai/gemini_szmania/gemini-2.5-pro) --- aider/exceptions.py | 229 ++++++++++++++++++++++---------------------- 1 file changed, 115 insertions(+), 114 deletions(-) diff --git a/aider/exceptions.py b/aider/exceptions.py index 0348df5b4b0..ade8a491c22 100644 --- a/aider/exceptions.py +++ b/aider/exceptions.py @@ -1,114 +1,115 @@ -from dataclasses import dataclass - -from aider.dump import dump # noqa: F401 - - -@dataclass -class ExInfo: - name: str - retry: bool - description: str - - -EXCEPTIONS = [ - ExInfo("APIConnectionError", True, None), - ExInfo("APIError", True, None), - ExInfo("APIResponseValidationError", True, None), - ExInfo( - "AuthenticationError", - False, - "The API provider is not able to authenticate you. Check your API key.", - ), - ExInfo("AzureOpenAIError", True, None), - ExInfo("BadRequestError", False, None), - ExInfo("BudgetExceededError", True, None), - ExInfo( - "ContentPolicyViolationError", - True, - "The API provider has refused the request due to a safety policy about the content.", - ), - ExInfo("ContextWindowExceededError", False, None), # special case handled in base_coder - ExInfo("ErrorEventError", True, None), - ExInfo("ImageFetchError", True, "The API cannot fetch an image"), - ExInfo("InternalServerError", True, "The API provider's servers are down or overloaded."), - ExInfo("InvalidRequestError", True, None), - ExInfo("JSONSchemaValidationError", True, None), - ExInfo("NotFoundError", False, None), - ExInfo("OpenAIError", True, None), - ExInfo( - "RateLimitError", - True, - "The API provider has rate limited you. Try again later or check your quotas.", - ), - ExInfo("RouterRateLimitError", True, None), - ExInfo("ServiceUnavailableError", True, "The API provider's servers are down or overloaded."), - ExInfo("UnprocessableEntityError", True, None), - ExInfo("UnsupportedParamsError", True, None), - ExInfo( - "Timeout", - True, - "The API provider timed out without returning a response. They may be down or overloaded.", - ), -] - - -class LiteLLMExceptions: - exceptions = dict() - exception_info = {exi.name: exi for exi in EXCEPTIONS} - - def __init__(self): - self._load() - - def _load(self, strict=False): - import litellm - - for var in dir(litellm): - if var.endswith("Error"): - if var not in self.exception_info: - raise ValueError(f"{var} is in litellm but not in aider's exceptions list") - - for var in self.exception_info: - ex = getattr(litellm, var, "default") - - if ex != "default": - if not issubclass(ex, BaseException): - continue - - self.exceptions[ex] = self.exception_info[var] - - def exceptions_tuple(self): - return tuple(self.exceptions) - - def get_ex_info(self, ex): - """Return the ExInfo for a given exception instance""" - import litellm - - if ex.__class__ is litellm.APIConnectionError: - if "google.auth" in str(ex): - return ExInfo( - "APIConnectionError", False, "You need to: pip install google-generativeai" - ) - if "boto3" in str(ex): - return ExInfo("APIConnectionError", False, "You need to: pip install boto3") - if "OpenrouterException" in str(ex) and "'choices'" in str(ex): - return ExInfo( - "APIConnectionError", - True, - ( - "OpenRouter or the upstream API provider is down, overloaded or rate" - " limiting your requests." - ), - ) - - # Check for specific non-retryable APIError cases like insufficient credits - if ex.__class__ is litellm.APIError: - err_str = str(ex).lower() - if "insufficient credits" in err_str and '"code":402' in err_str: - return ExInfo( - "APIError", - False, - "Insufficient credits with the API provider. Please add credits.", - ) - # Fall through to default APIError handling if not the specific credits error - - return self.exceptions.get(ex.__class__, ExInfo(None, None, None)) +from dataclasses import dataclass + +from aider.dump import dump # noqa: F401 + + +@dataclass +class ExInfo: + name: str + retry: bool + description: str + + +EXCEPTIONS = [ + ExInfo("APIConnectionError", True, None), + ExInfo("APIError", True, None), + ExInfo("APIResponseValidationError", True, None), + ExInfo( + "AuthenticationError", + False, + "The API provider is not able to authenticate you. Check your API key.", + ), + ExInfo("AzureOpenAIError", True, None), + ExInfo("BadGatewayError", True, None), + ExInfo("BadRequestError", False, None), + ExInfo("BudgetExceededError", True, None), + ExInfo( + "ContentPolicyViolationError", + True, + "The API provider has refused the request due to a safety policy about the content.", + ), + ExInfo("ContextWindowExceededError", False, None), # special case handled in base_coder + ExInfo("ErrorEventError", True, None), + ExInfo("ImageFetchError", True, "The API cannot fetch an image"), + ExInfo("InternalServerError", True, "The API provider's servers are down or overloaded."), + ExInfo("InvalidRequestError", True, None), + ExInfo("JSONSchemaValidationError", True, None), + ExInfo("NotFoundError", False, None), + ExInfo("OpenAIError", True, None), + ExInfo( + "RateLimitError", + True, + "The API provider has rate limited you. Try again later or check your quotas.", + ), + ExInfo("RouterRateLimitError", True, None), + ExInfo("ServiceUnavailableError", True, "The API provider's servers are down or overloaded."), + ExInfo("UnprocessableEntityError", True, None), + ExInfo("UnsupportedParamsError", True, None), + ExInfo( + "Timeout", + True, + "The API provider timed out without returning a response. They may be down or overloaded.", + ), +] + + +class LiteLLMExceptions: + exceptions = dict() + exception_info = {exi.name: exi for exi in EXCEPTIONS} + + def __init__(self): + self._load() + + def _load(self, strict=False): + import litellm + + for var in dir(litellm): + if var.endswith("Error"): + if var not in self.exception_info: + raise ValueError(f"{var} is in litellm but not in aider's exceptions list") + + for var in self.exception_info: + ex = getattr(litellm, var, "default") + + if ex != "default": + if not issubclass(ex, BaseException): + continue + + self.exceptions[ex] = self.exception_info[var] + + def exceptions_tuple(self): + return tuple(self.exceptions) + + def get_ex_info(self, ex): + """Return the ExInfo for a given exception instance""" + import litellm + + if ex.__class__ is litellm.APIConnectionError: + if "google.auth" in str(ex): + return ExInfo( + "APIConnectionError", False, "You need to: pip install google-generativeai" + ) + if "boto3" in str(ex): + return ExInfo("APIConnectionError", False, "You need to: pip install boto3") + if "OpenrouterException" in str(ex) and "'choices'" in str(ex): + return ExInfo( + "APIConnectionError", + True, + ( + "OpenRouter or the upstream API provider is down, overloaded or rate" + " limiting your requests." + ), + ) + + # Check for specific non-retryable APIError cases like insufficient credits + if ex.__class__ is litellm.APIError: + err_str = str(ex).lower() + if "insufficient credits" in err_str and '"code":402' in err_str: + return ExInfo( + "APIError", + False, + "Insufficient credits with the API provider. Please add credits.", + ) + # Fall through to default APIError handling if not the specific credits error + + return self.exceptions.get(ex.__class__, ExInfo(None, None, None)) From 0a45d42079c59f49b7f9897dc8874437c9ae361a Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 26 Nov 2025 14:02:12 -0800 Subject: [PATCH 02/17] feat: Add BadGatewayError handling and test case Co-authored-by: aider (openai/gemini_szmania/gemini-2.5-pro) --- tests/basic/test_exceptions.py | 181 ++++++++++++++++++--------------- 1 file changed, 97 insertions(+), 84 deletions(-) diff --git a/tests/basic/test_exceptions.py b/tests/basic/test_exceptions.py index 5f9c095f8b6..821bf249956 100644 --- a/tests/basic/test_exceptions.py +++ b/tests/basic/test_exceptions.py @@ -1,84 +1,97 @@ -from aider.exceptions import ExInfo, LiteLLMExceptions - - -def test_litellm_exceptions_load(): - """Test that LiteLLMExceptions loads without errors""" - ex = LiteLLMExceptions() - assert len(ex.exceptions) > 0 - - -def test_exceptions_tuple(): - """Test that exceptions_tuple returns a non-empty tuple""" - ex = LiteLLMExceptions() - assert isinstance(ex.exceptions_tuple(), tuple) - assert len(ex.exceptions_tuple()) > 0 - - -def test_get_ex_info(): - """Test get_ex_info returns correct ExInfo""" - ex = LiteLLMExceptions() - - # Test with a known exception type - from litellm import AuthenticationError - - auth_error = AuthenticationError( - message="Invalid API key", llm_provider="openai", model="gpt-4" - ) - ex_info = ex.get_ex_info(auth_error) - assert isinstance(ex_info, ExInfo) - assert ex_info.name == "AuthenticationError" - assert ex_info.retry is False - assert "API key" in ex_info.description - - # Test with unknown exception type - class UnknownError(Exception): - pass - - unknown = UnknownError() - ex_info = ex.get_ex_info(unknown) - assert isinstance(ex_info, ExInfo) - assert ex_info.name is None - assert ex_info.retry is None - assert ex_info.description is None - - -def test_rate_limit_error(): - """Test specific handling of RateLimitError""" - ex = LiteLLMExceptions() - from litellm import RateLimitError - - rate_error = RateLimitError(message="Rate limit exceeded", llm_provider="openai", model="gpt-4") - ex_info = ex.get_ex_info(rate_error) - assert ex_info.retry is True - assert "rate limited" in ex_info.description.lower() - - -def test_context_window_error(): - """Test specific handling of ContextWindowExceededError""" - ex = LiteLLMExceptions() - from litellm import ContextWindowExceededError - - ctx_error = ContextWindowExceededError( - message="Context length exceeded", model="gpt-4", llm_provider="openai" - ) - ex_info = ex.get_ex_info(ctx_error) - assert ex_info.retry is False - - -def test_openrouter_error(): - """Test specific handling of OpenRouter API errors""" - ex = LiteLLMExceptions() - from litellm import APIConnectionError - - # Create an APIConnectionError with OpenrouterException message - openrouter_error = APIConnectionError( - message="APIConnectionError: OpenrouterException - 'choices'", - model="openrouter/model", - llm_provider="openrouter", - ) - - ex_info = ex.get_ex_info(openrouter_error) - assert ex_info.retry is True - assert "OpenRouter" in ex_info.description - assert "overloaded" in ex_info.description - assert "rate" in ex_info.description +from aider.exceptions import ExInfo, LiteLLMExceptions + + +def test_litellm_exceptions_load(): + """Test that LiteLLMExceptions loads without errors""" + ex = LiteLLMExceptions() + assert len(ex.exceptions) > 0 + + +def test_exceptions_tuple(): + """Test that exceptions_tuple returns a non-empty tuple""" + ex = LiteLLMExceptions() + assert isinstance(ex.exceptions_tuple(), tuple) + assert len(ex.exceptions_tuple()) > 0 + + +def test_get_ex_info(): + """Test get_ex_info returns correct ExInfo""" + ex = LiteLLMExceptions() + + # Test with a known exception type + from litellm import AuthenticationError + + auth_error = AuthenticationError( + message="Invalid API key", llm_provider="openai", model="gpt-4" + ) + ex_info = ex.get_ex_info(auth_error) + assert isinstance(ex_info, ExInfo) + assert ex_info.name == "AuthenticationError" + assert ex_info.retry is False + assert "API key" in ex_info.description + + # Test with unknown exception type + class UnknownError(Exception): + pass + + unknown = UnknownError() + ex_info = ex.get_ex_info(unknown) + assert isinstance(ex_info, ExInfo) + assert ex_info.name is None + assert ex_info.retry is None + assert ex_info.description is None + + +def test_rate_limit_error(): + """Test specific handling of RateLimitError""" + ex = LiteLLMExceptions() + from litellm import RateLimitError + + rate_error = RateLimitError(message="Rate limit exceeded", llm_provider="openai", model="gpt-4") + ex_info = ex.get_ex_info(rate_error) + assert ex_info.retry is True + assert "rate limited" in ex_info.description.lower() + + +def test_bad_gateway_error(): + """Test specific handling of BadGatewayError""" + ex = LiteLLMExceptions() + from litellm import BadGatewayError + + bad_gateway_error = BadGatewayError( + message="Bad Gateway", llm_provider="openai", model="gpt-4" + ) + ex_info = ex.get_ex_info(bad_gateway_error) + assert ex_info.retry is True + assert ex_info.name == "BadGatewayError" + + +def test_context_window_error(): + """Test specific handling of ContextWindowExceededError""" + ex = LiteLLMExceptions() + from litellm import ContextWindowExceededError + + ctx_error = ContextWindowExceededError( + message="Context length exceeded", model="gpt-4", llm_provider="openai" + ) + ex_info = ex.get_ex_info(ctx_error) + assert ex_info.retry is False + + +def test_openrouter_error(): + """Test specific handling of OpenRouter API errors""" + ex = LiteLLMExceptions() + from litellm import APIConnectionError + + # Create an APIConnectionError with OpenrouterException message + openrouter_error = APIConnectionError( + message="APIConnectionError: OpenrouterException - 'choices'", + model="openrouter/model", + llm_provider="openrouter", + ) + + ex_info = ex.get_ex_info(openrouter_error) + assert ex_info.retry is True + assert "OpenRouter" in ex_info.description + assert "overloaded" in ex_info.description + assert "rate" in ex_info.description From 129bf85f2a047c7250a397b4b824a4c3b680c39c Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 26 Nov 2025 14:07:38 -0800 Subject: [PATCH 03/17] fix: Deduplicate tools before sending to the model to avoid errors. Co-authored-by: aider (openai/gemini_szmania/gemini-2.5-pro) --- aider/models.py | 2637 ++++++++++++++++++++++++----------------------- 1 file changed, 1324 insertions(+), 1313 deletions(-) diff --git a/aider/models.py b/aider/models.py index 4c09161d02a..d86cd3c82a0 100644 --- a/aider/models.py +++ b/aider/models.py @@ -1,1313 +1,1324 @@ -import asyncio -import difflib -import hashlib -import importlib.resources -import json -import math -import os -import platform -import sys -import time -from dataclasses import dataclass, fields -from pathlib import Path -from typing import Optional, Union - -import json5 -import yaml -from PIL import Image - -from aider import __version__ -from aider.dump import dump # noqa: F401 -from aider.llm import litellm -from aider.openrouter import OpenRouterModelManager -from aider.sendchat import ensure_alternating_roles, sanity_check_messages -from aider.utils import check_pip_install_extra - -RETRY_TIMEOUT = 60 - -request_timeout = 600 - -DEFAULT_MODEL_NAME = "gpt-4o" -ANTHROPIC_BETA_HEADER = "prompt-caching-2024-07-31,pdfs-2024-09-25" - -OPENAI_MODELS = """ -o1 -o1-preview -o1-mini -o3-mini -gpt-4 -gpt-4o -gpt-4o-2024-05-13 -gpt-4-turbo-preview -gpt-4-0314 -gpt-4-0613 -gpt-4-32k -gpt-4-32k-0314 -gpt-4-32k-0613 -gpt-4-turbo -gpt-4-turbo-2024-04-09 -gpt-4-1106-preview -gpt-4-0125-preview -gpt-4-vision-preview -gpt-4-1106-vision-preview -gpt-4o-mini -gpt-4o-mini-2024-07-18 -gpt-3.5-turbo -gpt-3.5-turbo-0301 -gpt-3.5-turbo-0613 -gpt-3.5-turbo-1106 -gpt-3.5-turbo-0125 -gpt-3.5-turbo-16k -gpt-3.5-turbo-16k-0613 -""" - -OPENAI_MODELS = [ln.strip() for ln in OPENAI_MODELS.splitlines() if ln.strip()] - -ANTHROPIC_MODELS = """ -claude-2 -claude-2.1 -claude-3-haiku-20240307 -claude-3-5-haiku-20241022 -claude-3-opus-20240229 -claude-3-sonnet-20240229 -claude-3-5-sonnet-20240620 -claude-3-5-sonnet-20241022 -claude-sonnet-4-20250514 -claude-opus-4-20250514 -""" - -ANTHROPIC_MODELS = [ln.strip() for ln in ANTHROPIC_MODELS.splitlines() if ln.strip()] - -# Mapping of model aliases to their canonical names -MODEL_ALIASES = { - # Claude models - "sonnet": "anthropic/claude-sonnet-4-20250514", - "haiku": "claude-3-5-haiku-20241022", - "opus": "claude-opus-4-20250514", - # GPT models - "4": "gpt-4-0613", - "4o": "gpt-4o", - "4-turbo": "gpt-4-1106-preview", - "35turbo": "gpt-3.5-turbo", - "35-turbo": "gpt-3.5-turbo", - "3": "gpt-3.5-turbo", - # Other models - "deepseek": "deepseek/deepseek-chat", - "flash": "gemini/gemini-2.5-flash", - "flash-lite": "gemini/gemini-2.5-flash-lite", - "quasar": "openrouter/openrouter/quasar-alpha", - "r1": "deepseek/deepseek-reasoner", - "gemini-2.5-pro": "gemini/gemini-2.5-pro", - "gemini": "gemini/gemini-2.5-pro", - "gemini-exp": "gemini/gemini-2.5-pro-exp-03-25", - "grok3": "xai/grok-3-beta", - "optimus": "openrouter/openrouter/optimus-alpha", -} -# Model metadata loaded from resources and user's files. - - -@dataclass -class ModelSettings: - # Model class needs to have each of these as well - name: str - edit_format: str = "whole" - weak_model_name: Optional[str] = None - use_repo_map: bool = False - send_undo_reply: bool = False - lazy: bool = False - overeager: bool = False - reminder: str = "user" - examples_as_sys_msg: bool = False - extra_params: Optional[dict] = None - cache_control: bool = False - caches_by_default: bool = False - use_system_prompt: bool = True - use_temperature: Union[bool, float] = True - streaming: bool = True - editor_model_name: Optional[str] = None - editor_edit_format: Optional[str] = None - reasoning_tag: Optional[str] = None - remove_reasoning: Optional[str] = None # Deprecated alias for reasoning_tag - system_prompt_prefix: Optional[str] = None - accepts_settings: Optional[list] = None - - -# Load model settings from package resource -MODEL_SETTINGS = [] -with importlib.resources.open_text("aider.resources", "model-settings.yml") as f: - model_settings_list = yaml.safe_load(f) - for model_settings_dict in model_settings_list: - MODEL_SETTINGS.append(ModelSettings(**model_settings_dict)) - - -class ModelInfoManager: - MODEL_INFO_URL = ( - "https://raw.githubusercontent.com/BerriAI/litellm/main/" - "model_prices_and_context_window.json" - ) - CACHE_TTL = 60 * 60 * 24 # 24 hours - - def __init__(self): - self.cache_dir = Path.home() / ".aider" / "caches" - self.cache_file = self.cache_dir / "model_prices_and_context_window.json" - self.content = None - self.local_model_metadata = {} - self.verify_ssl = True - self._cache_loaded = False - - # Manager for the cached OpenRouter model database - self.openrouter_manager = OpenRouterModelManager() - - def set_verify_ssl(self, verify_ssl): - self.verify_ssl = verify_ssl - if hasattr(self, "openrouter_manager"): - self.openrouter_manager.set_verify_ssl(verify_ssl) - - def _load_cache(self): - if self._cache_loaded: - return - - try: - self.cache_dir.mkdir(parents=True, exist_ok=True) - if self.cache_file.exists(): - cache_age = time.time() - self.cache_file.stat().st_mtime - if cache_age < self.CACHE_TTL: - try: - self.content = json.loads(self.cache_file.read_text()) - except json.JSONDecodeError: - # If the cache file is corrupted, treat it as missing - self.content = None - except OSError: - pass - - self._cache_loaded = True - - def _update_cache(self): - try: - import requests - - # Respect the --no-verify-ssl switch - response = requests.get(self.MODEL_INFO_URL, timeout=5, verify=self.verify_ssl) - if response.status_code == 200: - self.content = response.json() - try: - self.cache_file.write_text(json.dumps(self.content, indent=4)) - except OSError: - pass - except Exception as ex: - print(str(ex)) - try: - # Save empty dict to cache file on failure - self.cache_file.write_text("{}") - except OSError: - pass - - def get_model_from_cached_json_db(self, model): - data = self.local_model_metadata.get(model) - if data: - return data - - # Ensure cache is loaded before checking content - self._load_cache() - - if not self.content: - self._update_cache() - - if not self.content: - return dict() - - info = self.content.get(model, dict()) - if info: - return info - - pieces = model.split("/") - if len(pieces) == 2: - info = self.content.get(pieces[1]) - if info and info.get("litellm_provider") == pieces[0]: - return info - - return dict() - - def get_model_info(self, model): - cached_info = self.get_model_from_cached_json_db(model) - - litellm_info = None - if litellm._lazy_module or not cached_info: - try: - litellm_info = litellm.get_model_info(model) - except Exception as ex: - if "model_prices_and_context_window.json" not in str(ex): - print(str(ex)) - - if litellm_info: - return litellm_info - - if not cached_info and model.startswith("openrouter/"): - # First try using the locally cached OpenRouter model database - openrouter_info = self.openrouter_manager.get_model_info(model) - if openrouter_info: - return openrouter_info - - # Fallback to legacy web-scraping if the API cache does not contain the model - openrouter_info = self.fetch_openrouter_model_info(model) - if openrouter_info: - return openrouter_info - - return cached_info - - def fetch_openrouter_model_info(self, model): - """ - Fetch model info by scraping the openrouter model page. - Expected URL: https://openrouter.ai/ - Example: openrouter/qwen/qwen-2.5-72b-instruct:free - Returns a dict with keys: max_tokens, max_input_tokens, max_output_tokens, - input_cost_per_token, output_cost_per_token. - """ - url_part = model[len("openrouter/") :] - url = "https://openrouter.ai/" + url_part - try: - import requests - - response = requests.get(url, timeout=5, verify=self.verify_ssl) - if response.status_code != 200: - return {} - html = response.text - import re - - if re.search( - rf"The model\s*.*{re.escape(url_part)}.* is not available", html, re.IGNORECASE - ): - print(f"\033[91mError: Model '{url_part}' is not available\033[0m") - return {} - text = re.sub(r"<[^>]+>", " ", html) - context_match = re.search(r"([\d,]+)\s*context", text) - if context_match: - context_str = context_match.group(1).replace(",", "") - context_size = int(context_str) - else: - context_size = None - input_cost_match = re.search(r"\$\s*([\d.]+)\s*/M input tokens", text, re.IGNORECASE) - output_cost_match = re.search(r"\$\s*([\d.]+)\s*/M output tokens", text, re.IGNORECASE) - input_cost = float(input_cost_match.group(1)) / 1000000 if input_cost_match else None - output_cost = float(output_cost_match.group(1)) / 1000000 if output_cost_match else None - if context_size is None or input_cost is None or output_cost is None: - return {} - params = { - "max_input_tokens": context_size, - "max_tokens": context_size, - "max_output_tokens": context_size, - "input_cost_per_token": input_cost, - "output_cost_per_token": output_cost, - } - return params - except Exception as e: - print("Error fetching openrouter info:", str(e)) - return {} - - -model_info_manager = ModelInfoManager() - - -class Model(ModelSettings): - def __init__( - self, model, weak_model=None, editor_model=None, editor_edit_format=None, verbose=False - ): - # Map any alias to its canonical name - model = MODEL_ALIASES.get(model, model) - - self.name = model - self.verbose = verbose - - self.max_chat_history_tokens = 1024 - self.weak_model = None - self.editor_model = None - - # Find the extra settings - self.extra_model_settings = next( - (ms for ms in MODEL_SETTINGS if ms.name == "aider/extra_params"), None - ) - - self.info = self.get_model_info(model) - - # Are all needed keys/params available? - res = self.validate_environment() - self.missing_keys = res.get("missing_keys") - self.keys_in_environment = res.get("keys_in_environment") - - max_input_tokens = self.info.get("max_input_tokens") or 0 - # Calculate max_chat_history_tokens as 1/16th of max_input_tokens, - # with minimum 1k and maximum 8k - self.max_chat_history_tokens = min(max(max_input_tokens / 16, 1024), 8192) - - self.configure_model_settings(model) - if weak_model is False: - self.weak_model_name = None - else: - self.get_weak_model(weak_model) - - if editor_model is False: - self.editor_model_name = None - else: - self.get_editor_model(editor_model, editor_edit_format) - - def get_model_info(self, model): - return model_info_manager.get_model_info(model) - - def _copy_fields(self, source): - """Helper to copy fields from a ModelSettings instance to self""" - for field in fields(ModelSettings): - val = getattr(source, field.name) - setattr(self, field.name, val) - - # Handle backward compatibility: if remove_reasoning is set but reasoning_tag isn't, - # use remove_reasoning's value for reasoning_tag - if self.reasoning_tag is None and self.remove_reasoning is not None: - self.reasoning_tag = self.remove_reasoning - - def configure_model_settings(self, model): - # Look for exact model match - exact_match = False - for ms in MODEL_SETTINGS: - # direct match, or match "provider/" - if model == ms.name: - self._copy_fields(ms) - exact_match = True - break # Continue to apply overrides - - # Initialize accepts_settings if it's None - if self.accepts_settings is None: - self.accepts_settings = [] - - model = model.lower() - - # If no exact match, try generic settings - if not exact_match: - self.apply_generic_model_settings(model) - - # Apply override settings last if they exist - if ( - self.extra_model_settings - and self.extra_model_settings.extra_params - and self.extra_model_settings.name == "aider/extra_params" - ): - # Initialize extra_params if it doesn't exist - if not self.extra_params: - self.extra_params = {} - - # Deep merge the extra_params dicts - for key, value in self.extra_model_settings.extra_params.items(): - if isinstance(value, dict) and isinstance(self.extra_params.get(key), dict): - # For nested dicts, merge recursively - self.extra_params[key] = {**self.extra_params[key], **value} - else: - # For non-dict values, simply update - self.extra_params[key] = value - - # Ensure OpenRouter models accept thinking_tokens and reasoning_effort - if self.name.startswith("openrouter/"): - if self.accepts_settings is None: - self.accepts_settings = [] - if "thinking_tokens" not in self.accepts_settings: - self.accepts_settings.append("thinking_tokens") - if "reasoning_effort" not in self.accepts_settings: - self.accepts_settings.append("reasoning_effort") - - def apply_generic_model_settings(self, model): - if "/o3-mini" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.use_temperature = False - self.system_prompt_prefix = "Formatting re-enabled. " - self.system_prompt_prefix = "Formatting re-enabled. " - if "reasoning_effort" not in self.accepts_settings: - self.accepts_settings.append("reasoning_effort") - return # <-- - - if "gpt-4.1-mini" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.reminder = "sys" - self.examples_as_sys_msg = False - return # <-- - - if "gpt-4.1" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.reminder = "sys" - self.examples_as_sys_msg = False - return # <-- - - last_segment = model.split("/")[-1] - if last_segment in ("gpt-5", "gpt-5-2025-08-07"): - self.use_temperature = False - self.edit_format = "diff" - if "reasoning_effort" not in self.accepts_settings: - self.accepts_settings.append("reasoning_effort") - return # <-- - - if "/o1-mini" in model: - self.use_repo_map = True - self.use_temperature = False - self.use_system_prompt = False - return # <-- - - if "/o1-preview" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.use_temperature = False - self.use_system_prompt = False - return # <-- - - if "/o1" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.use_temperature = False - self.streaming = False - self.system_prompt_prefix = "Formatting re-enabled. " - if "reasoning_effort" not in self.accepts_settings: - self.accepts_settings.append("reasoning_effort") - return # <-- - - if "deepseek" in model and "v3" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.reminder = "sys" - self.examples_as_sys_msg = True - return # <-- - - if "deepseek" in model and ("r1" in model or "reasoning" in model): - self.edit_format = "diff" - self.use_repo_map = True - self.examples_as_sys_msg = True - self.use_temperature = False - self.reasoning_tag = "think" - return # <-- - - if ("llama3" in model or "llama-3" in model) and "70b" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.send_undo_reply = True - self.examples_as_sys_msg = True - return # <-- - - if "gpt-4-turbo" in model or ("gpt-4-" in model and "-preview" in model): - self.edit_format = "udiff" - self.use_repo_map = True - self.send_undo_reply = True - return # <-- - - if "gpt-4" in model or "claude-3-opus" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.send_undo_reply = True - return # <-- - - if "gpt-3.5" in model or "gpt-4" in model: - self.reminder = "sys" - return # <-- - - if "3-7-sonnet" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.examples_as_sys_msg = True - self.reminder = "user" - if "thinking_tokens" not in self.accepts_settings: - self.accepts_settings.append("thinking_tokens") - return # <-- - - if "3.5-sonnet" in model or "3-5-sonnet" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.examples_as_sys_msg = True - self.reminder = "user" - return # <-- - - if model.startswith("o1-") or "/o1-" in model: - self.use_system_prompt = False - self.use_temperature = False - return # <-- - - if ( - "qwen" in model - and "coder" in model - and ("2.5" in model or "2-5" in model) - and "32b" in model - ): - self.edit_format = "diff" - self.editor_edit_format = "editor-diff" - self.use_repo_map = True - return # <-- - - if "qwq" in model and "32b" in model and "preview" not in model: - self.edit_format = "diff" - self.editor_edit_format = "editor-diff" - self.use_repo_map = True - self.reasoning_tag = "think" - self.examples_as_sys_msg = True - self.use_temperature = 0.6 - self.extra_params = dict(top_p=0.95) - return # <-- - - if "qwen3" in model: - self.edit_format = "diff" - self.use_repo_map = True - if "235b" in model: - self.system_prompt_prefix = "/no_think" - self.use_temperature = 0.7 - self.extra_params = {"top_p": 0.8, "top_k": 20, "min_p": 0.0} - else: - self.examples_as_sys_msg = True - self.use_temperature = 0.6 - self.reasoning_tag = "think" - self.extra_params = {"top_p": 0.95, "top_k": 20, "min_p": 0.0} - return # <-- - - # use the defaults - if self.edit_format == "diff": - self.use_repo_map = True - return # <-- - - def __str__(self): - return self.name - - def get_weak_model(self, provided_weak_model_name): - # If weak_model_name is provided, override the model settings - if provided_weak_model_name: - self.weak_model_name = provided_weak_model_name - - if not self.weak_model_name: - self.weak_model = self - return - - if self.weak_model_name == self.name: - self.weak_model = self - return - - self.weak_model = Model( - self.weak_model_name, - weak_model=False, - ) - return self.weak_model - - def commit_message_models(self): - return [self.weak_model, self] - - def get_editor_model(self, provided_editor_model_name, editor_edit_format): - # If editor_model_name is provided, override the model settings - if provided_editor_model_name: - self.editor_model_name = provided_editor_model_name - if editor_edit_format: - self.editor_edit_format = editor_edit_format - - if not self.editor_model_name or self.editor_model_name == self.name: - self.editor_model = self - else: - self.editor_model = Model( - self.editor_model_name, - editor_model=False, - ) - - if not self.editor_edit_format: - self.editor_edit_format = self.editor_model.edit_format - if self.editor_edit_format in ("diff", "whole", "diff-fenced"): - self.editor_edit_format = "editor-" + self.editor_edit_format - - return self.editor_model - - def tokenizer(self, text): - return litellm.encode(model=self.name, text=text) - - def token_count(self, messages): - if isinstance(messages, dict): - messages = [messages] - - if isinstance(messages, list): - try: - return litellm.token_counter(model=self.name, messages=messages) - except Exception: - pass # fall back to raw tokenizer - - if not self.tokenizer: - return 0 - - if isinstance(messages, str): - msgs = messages - else: - msgs = json.dumps(messages) - - try: - return len(self.tokenizer(msgs)) - except Exception as err: - print(f"Unable to count tokens with tokenizer: {err}") - return 0 - - def token_count_for_image(self, fname): - """ - Calculate the token cost for an image assuming high detail. - The token cost is determined by the size of the image. - :param fname: The filename of the image. - :return: The token cost for the image. - """ - width, height = self.get_image_size(fname) - - # If the image is larger than 2048 in any dimension, scale it down to fit within 2048x2048 - max_dimension = max(width, height) - if max_dimension > 2048: - scale_factor = 2048 / max_dimension - width = int(width * scale_factor) - height = int(height * scale_factor) - - # Scale the image such that the shortest side is 768 pixels long - min_dimension = min(width, height) - scale_factor = 768 / min_dimension - width = int(width * scale_factor) - height = int(height * scale_factor) - - # Calculate the number of 512x512 tiles needed to cover the image - tiles_width = math.ceil(width / 512) - tiles_height = math.ceil(height / 512) - num_tiles = tiles_width * tiles_height - - # Each tile costs 170 tokens, and there's an additional fixed cost of 85 tokens - token_cost = num_tiles * 170 + 85 - return token_cost - - def get_image_size(self, fname): - """ - Retrieve the size of an image. - :param fname: The filename of the image. - :return: A tuple (width, height) representing the image size in pixels. - """ - with Image.open(fname) as img: - return img.size - - def fast_validate_environment(self): - """Fast path for common models. Avoids forcing litellm import.""" - - model = self.name - - pieces = model.split("/") - if len(pieces) > 1: - provider = pieces[0] - else: - provider = None - - keymap = dict( - openrouter="OPENROUTER_API_KEY", - openai="OPENAI_API_KEY", - deepseek="DEEPSEEK_API_KEY", - gemini="GEMINI_API_KEY", - anthropic="ANTHROPIC_API_KEY", - groq="GROQ_API_KEY", - fireworks_ai="FIREWORKS_API_KEY", - ) - var = None - if model in OPENAI_MODELS: - var = "OPENAI_API_KEY" - elif model in ANTHROPIC_MODELS: - var = "ANTHROPIC_API_KEY" - else: - var = keymap.get(provider) - - if var and os.environ.get(var): - return dict(keys_in_environment=[var], missing_keys=[]) - - def validate_environment(self): - res = self.fast_validate_environment() - if res: - return res - - # https://github.com/BerriAI/litellm/issues/3190 - - model = self.name - res = litellm.validate_environment(model) - - # If missing AWS credential keys but AWS_PROFILE is set, consider AWS credentials valid - if res["missing_keys"] and any( - key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] for key in res["missing_keys"] - ): - if model.startswith("bedrock/") or model.startswith("us.anthropic."): - if os.environ.get("AWS_PROFILE"): - res["missing_keys"] = [ - k - for k in res["missing_keys"] - if k not in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] - ] - if not res["missing_keys"]: - res["keys_in_environment"] = True - - if res["keys_in_environment"]: - return res - if res["missing_keys"]: - return res - - provider = self.info.get("litellm_provider", "").lower() - if provider == "cohere_chat": - return validate_variables(["COHERE_API_KEY"]) - if provider == "gemini": - return validate_variables(["GEMINI_API_KEY"]) - if provider == "groq": - return validate_variables(["GROQ_API_KEY"]) - - return res - - def get_repo_map_tokens(self): - map_tokens = 1024 - max_inp_tokens = self.info.get("max_input_tokens") - if max_inp_tokens: - map_tokens = max_inp_tokens / 8 - map_tokens = min(map_tokens, 4096) - map_tokens = max(map_tokens, 1024) - return map_tokens - - def set_reasoning_effort(self, effort): - """Set the reasoning effort parameter for models that support it""" - if effort is not None: - if self.name.startswith("openrouter/"): - if not self.extra_params: - self.extra_params = {} - if "extra_body" not in self.extra_params: - self.extra_params["extra_body"] = {} - self.extra_params["extra_body"]["reasoning"] = {"effort": effort} - else: - if not self.extra_params: - self.extra_params = {} - if "extra_body" not in self.extra_params: - self.extra_params["extra_body"] = {} - self.extra_params["extra_body"]["reasoning_effort"] = effort - - def parse_token_value(self, value): - """ - Parse a token value string into an integer. - Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc. - - Args: - value: String or int token value - - Returns: - Integer token value - """ - if isinstance(value, int): - return value - - if not isinstance(value, str): - return int(value) # Try to convert to int - - value = value.strip().upper() - - if value.endswith("K"): - multiplier = 1024 - value = value[:-1] - elif value.endswith("M"): - multiplier = 1024 * 1024 - value = value[:-1] - else: - multiplier = 1 - - # Convert to float first to handle decimal values like "10.5k" - return int(float(value) * multiplier) - - def set_thinking_tokens(self, value): - """ - Set the thinking token budget for models that support it. - Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc. - Pass "0" to disable thinking tokens. - """ - if value is not None: - num_tokens = self.parse_token_value(value) - self.use_temperature = False - if not self.extra_params: - self.extra_params = {} - - # OpenRouter models use 'reasoning' instead of 'thinking' - if self.name.startswith("openrouter/"): - if "extra_body" not in self.extra_params: - self.extra_params["extra_body"] = {} - if num_tokens > 0: - self.extra_params["extra_body"]["reasoning"] = {"max_tokens": num_tokens} - else: - if "reasoning" in self.extra_params["extra_body"]: - del self.extra_params["extra_body"]["reasoning"] - else: - if num_tokens > 0: - self.extra_params["thinking"] = {"type": "enabled", "budget_tokens": num_tokens} - else: - if "thinking" in self.extra_params: - del self.extra_params["thinking"] - - def get_raw_thinking_tokens(self): - """Get formatted thinking token budget if available""" - budget = None - - if self.extra_params: - # Check for OpenRouter reasoning format - if self.name.startswith("openrouter/"): - if ( - "extra_body" in self.extra_params - and "reasoning" in self.extra_params["extra_body"] - and "max_tokens" in self.extra_params["extra_body"]["reasoning"] - ): - budget = self.extra_params["extra_body"]["reasoning"]["max_tokens"] - # Check for standard thinking format - elif ( - "thinking" in self.extra_params and "budget_tokens" in self.extra_params["thinking"] - ): - budget = self.extra_params["thinking"]["budget_tokens"] - - return budget - - def get_thinking_tokens(self): - budget = self.get_raw_thinking_tokens() - - if budget is not None: - # Format as xx.yK for thousands, xx.yM for millions - if budget >= 1024 * 1024: - value = budget / (1024 * 1024) - if value == int(value): - return f"{int(value)}M" - else: - return f"{value:.1f}M" - else: - value = budget / 1024 - if value == int(value): - return f"{int(value)}k" - else: - return f"{value:.1f}k" - return None - - def get_reasoning_effort(self): - """Get reasoning effort value if available""" - if self.extra_params: - # Check for OpenRouter reasoning format - if self.name.startswith("openrouter/"): - if ( - "extra_body" in self.extra_params - and "reasoning" in self.extra_params["extra_body"] - and "effort" in self.extra_params["extra_body"]["reasoning"] - ): - return self.extra_params["extra_body"]["reasoning"]["effort"] - # Check for standard reasoning_effort format (e.g. in extra_body) - elif ( - "extra_body" in self.extra_params - and "reasoning_effort" in self.extra_params["extra_body"] - ): - return self.extra_params["extra_body"]["reasoning_effort"] - return None - - def is_deepseek(self): - name = self.name.lower() - if "deepseek" not in name: - return - return True - - def is_ollama(self): - return self.name.startswith("ollama/") or self.name.startswith("ollama_chat/") - - async def send_completion( - self, messages, functions, stream, temperature=None, tools=None, max_tokens=None - ): - if os.environ.get("AIDER_SANITY_CHECK_TURNS"): - sanity_check_messages(messages) - - messages = ensure_alternating_roles(messages) - - if self.verbose: - for message in messages: - msg_role = message.get("role") - msg_content = message.get("content") if message.get("content") else "" - msg_trunc = "" - - if message.get("content"): - msg_trunc = message.get("content")[:30] - - print(f"{msg_role} ({len(msg_content)}): {msg_trunc}") - - kwargs = dict(model=self.name, stream=stream) - - if self.use_temperature is not False: - if temperature is None: - if isinstance(self.use_temperature, bool): - temperature = 0 - else: - temperature = float(self.use_temperature) - - kwargs["temperature"] = temperature - - # `tools` is for modern tool usage. `functions` is for legacy/forced calls. - # This handles `base_coder` sending both with same content for `navigator_coder`. - effective_tools = tools - - if effective_tools is None and functions: - # Convert legacy `functions` to `tools` format if `tools` isn't provided. - effective_tools = [dict(type="function", function=f) for f in functions] - - if effective_tools: - kwargs["tools"] = effective_tools - - # Forcing a function call is for legacy style `functions` with a single function. - # This is used by ArchitectCoder and not intended for NavigatorCoder's tools. - if functions and len(functions) == 1: - function = functions[0] - - if "name" in function: - tool_name = function.get("name") - if tool_name: - kwargs["tool_choice"] = {"type": "function", "function": {"name": tool_name}} - - if self.extra_params: - kwargs.update(self.extra_params) - - if max_tokens: - kwargs["max_tokens"] = max_tokens - - if "max_tokens" in kwargs and kwargs["max_tokens"]: - kwargs["max_completion_tokens"] = kwargs.pop("max_tokens") - if self.is_ollama() and "num_ctx" not in kwargs: - num_ctx = int(self.token_count(messages) * 1.25) + 8192 - kwargs["num_ctx"] = num_ctx - - key = json.dumps(kwargs, sort_keys=True).encode() - # dump(kwargs) - - hash_object = hashlib.sha1(key) - if "timeout" not in kwargs: - kwargs["timeout"] = request_timeout - if self.verbose: - dump(kwargs) - kwargs["messages"] = messages - - # Are we using github copilot? - if "GITHUB_COPILOT_TOKEN" in os.environ or self.name.startswith("github_copilot/"): - if "extra_headers" not in kwargs: - kwargs["extra_headers"] = { - "Editor-Version": f"aider/{__version__}", - "Copilot-Integration-Id": "vscode-chat", - } - - try: - res = await litellm.acompletion(**kwargs) - except Exception as err: - print(f"LiteLLM API Error: {str(err)}") - res = self.model_error_response() - - if self.verbose: - print(f"LiteLLM API Error: {str(err)}") - raise - - return hash_object, res - - async def simple_send_with_retries(self, messages, max_tokens=None): - from aider.exceptions import LiteLLMExceptions - - litellm_ex = LiteLLMExceptions() - if "deepseek-reasoner" in self.name: - messages = ensure_alternating_roles(messages) - retry_delay = 0.125 - - if self.verbose: - dump(messages) - - while True: - try: - _hash, response = await self.send_completion( - messages=messages, - functions=None, - stream=False, - max_tokens=max_tokens, - ) - if not response or not hasattr(response, "choices") or not response.choices: - return None - res = response.choices[0].message.content - from aider.reasoning_tags import remove_reasoning_content - - return remove_reasoning_content(res, self.reasoning_tag) - - except litellm_ex.exceptions_tuple() as err: - ex_info = litellm_ex.get_ex_info(err) - print(str(err)) - if ex_info.description: - print(ex_info.description) - should_retry = ex_info.retry - if should_retry: - retry_delay *= 2 - if retry_delay > RETRY_TIMEOUT: - should_retry = False - if not should_retry: - return None - print(f"Retrying in {retry_delay:.1f} seconds...") - time.sleep(retry_delay) - continue - except AttributeError: - return None - - async def model_error_response(self): - for i in range(1): - await asyncio.sleep(0.1) - yield litellm.ModelResponse( - choices=[ - litellm.Choices( - finish_reason="stop", - index=0, - message=litellm.Message( - content="Model API Response Error. Please retry the previous request" - ), # Provide an empty message object - ) - ], - model=self.name, - ) - - -def register_models(model_settings_fnames): - files_loaded = [] - for model_settings_fname in model_settings_fnames: - if not os.path.exists(model_settings_fname): - continue - - if not Path(model_settings_fname).read_text().strip(): - continue - - try: - with open(model_settings_fname, "r") as model_settings_file: - model_settings_list = yaml.safe_load(model_settings_file) - - for model_settings_dict in model_settings_list: - model_settings = ModelSettings(**model_settings_dict) - - # Remove all existing settings for this model name - MODEL_SETTINGS[:] = [ms for ms in MODEL_SETTINGS if ms.name != model_settings.name] - # Add the new settings - MODEL_SETTINGS.append(model_settings) - except Exception as e: - raise Exception(f"Error loading model settings from {model_settings_fname}: {e}") - files_loaded.append(model_settings_fname) - - return files_loaded - - -def register_litellm_models(model_fnames): - files_loaded = [] - for model_fname in model_fnames: - if not os.path.exists(model_fname): - continue - - try: - data = Path(model_fname).read_text() - if not data.strip(): - continue - model_def = json5.loads(data) - if not model_def: - continue - - # Defer registration with litellm to faster path. - model_info_manager.local_model_metadata.update(model_def) - except Exception as e: - raise Exception(f"Error loading model definition from {model_fname}: {e}") - - files_loaded.append(model_fname) - - return files_loaded - - -def validate_variables(vars): - missing = [] - for var in vars: - if var not in os.environ: - missing.append(var) - if missing: - return dict(keys_in_environment=False, missing_keys=missing) - return dict(keys_in_environment=True, missing_keys=missing) - - -def sanity_check_models(io, main_model): - problem_main = sanity_check_model(io, main_model) - - problem_weak = None - if main_model.weak_model and main_model.weak_model is not main_model: - problem_weak = sanity_check_model(io, main_model.weak_model) - - problem_editor = None - if ( - main_model.editor_model - and main_model.editor_model is not main_model - and main_model.editor_model is not main_model.weak_model - ): - problem_editor = sanity_check_model(io, main_model.editor_model) - - return problem_main or problem_weak or problem_editor - - -def sanity_check_model(io, model): - show = False - - if model.missing_keys: - show = True - io.tool_warning(f"Warning: {model} expects these environment variables") - for key in model.missing_keys: - value = os.environ.get(key, "") - status = "Set" if value else "Not set" - io.tool_output(f"- {key}: {status}") - - if platform.system() == "Windows": - io.tool_output( - "Note: You may need to restart your terminal or command prompt for `setx` to take" - " effect." - ) - - elif not model.keys_in_environment: - show = True - io.tool_warning(f"Warning for {model}: Unknown which environment variables are required.") - - # Check for model-specific dependencies - check_for_dependencies(io, model.name) - - if not model.info: - show = True - io.tool_warning( - f"Warning for {model}: Unknown context window size and costs, using sane defaults." - ) - - possible_matches = fuzzy_match_models(model.name) - if possible_matches: - io.tool_output("Did you mean one of these?") - for match in possible_matches: - io.tool_output(f"- {match}") - - return show - - -def check_for_dependencies(io, model_name): - """ - Check for model-specific dependencies and install them if needed. - - Args: - io: The IO object for user interaction - model_name: The name of the model to check dependencies for - """ - # Check if this is a Bedrock model and ensure boto3 is installed - if model_name.startswith("bedrock/"): - check_pip_install_extra( - io, "boto3", "AWS Bedrock models require the boto3 package.", ["boto3"] - ) - - # Check if this is a Vertex AI model and ensure google-cloud-aiplatform is installed - elif model_name.startswith("vertex_ai/"): - check_pip_install_extra( - io, - "google.cloud.aiplatform", - "Google Vertex AI models require the google-cloud-aiplatform package.", - ["google-cloud-aiplatform"], - ) - - -def fuzzy_match_models(name): - name = name.lower() - - chat_models = set() - model_metadata = list(litellm.model_cost.items()) - model_metadata += list(model_info_manager.local_model_metadata.items()) - - for orig_model, attrs in model_metadata: - model = orig_model.lower() - if attrs.get("mode") != "chat": - continue - provider = attrs.get("litellm_provider", "").lower() - if not provider: - continue - provider += "/" - - if model.startswith(provider): - fq_model = orig_model - else: - fq_model = provider + orig_model - - chat_models.add(fq_model) - chat_models.add(orig_model) - - chat_models = sorted(chat_models) - # exactly matching model - # matching_models = [ - # (fq,m) for fq,m in chat_models - # if name == fq or name == m - # ] - # if matching_models: - # return matching_models - - # Check for model names containing the name - matching_models = [m for m in chat_models if name in m] - if matching_models: - return sorted(set(matching_models)) - - # Check for slight misspellings - models = set(chat_models) - matching_models = difflib.get_close_matches(name, models, n=3, cutoff=0.8) - - return sorted(set(matching_models)) - - -def print_matching_models(io, search): - matches = fuzzy_match_models(search) - if matches: - io.tool_output(f'Models which match "{search}":') - for model in matches: - io.tool_output(f"- {model}") - else: - io.tool_output(f'No models match "{search}".') - - -def get_model_settings_as_yaml(): - from dataclasses import fields - - import yaml - - model_settings_list = [] - # Add default settings first with all field values - defaults = {} - for field in fields(ModelSettings): - defaults[field.name] = field.default - defaults["name"] = "(default values)" - model_settings_list.append(defaults) - - # Sort model settings by name - for ms in sorted(MODEL_SETTINGS, key=lambda x: x.name): - # Create dict with explicit field order - model_settings_dict = {} - for field in fields(ModelSettings): - value = getattr(ms, field.name) - if value != field.default: - model_settings_dict[field.name] = value - model_settings_list.append(model_settings_dict) - # Add blank line between entries - model_settings_list.append(None) - - # Filter out None values before dumping - yaml_str = yaml.dump( - [ms for ms in model_settings_list if ms is not None], - default_flow_style=False, - sort_keys=False, # Preserve field order from dataclass - ) - # Add actual blank lines between entries - return yaml_str.replace("\n- ", "\n\n- ") - - -def main(): - if len(sys.argv) < 2: - print("Usage: python models.py or python models.py --yaml") - sys.exit(1) - - if sys.argv[1] == "--yaml": - yaml_string = get_model_settings_as_yaml() - print(yaml_string) - else: - model_name = sys.argv[1] - matching_models = fuzzy_match_models(model_name) - - if matching_models: - print(f"Matching models for '{model_name}':") - for model in matching_models: - print(model) - else: - print(f"No matching models found for '{model_name}'.") - - -if __name__ == "__main__": - main() +import asyncio +import difflib +import hashlib +import importlib.resources +import json +import math +import os +import platform +import sys +import time +from dataclasses import dataclass, fields +from pathlib import Path +from typing import Optional, Union + +import json5 +import yaml +from PIL import Image + +from aider import __version__ +from aider.dump import dump # noqa: F401 +from aider.llm import litellm +from aider.openrouter import OpenRouterModelManager +from aider.sendchat import ensure_alternating_roles, sanity_check_messages +from aider.utils import check_pip_install_extra + +RETRY_TIMEOUT = 60 + +request_timeout = 600 + +DEFAULT_MODEL_NAME = "gpt-4o" +ANTHROPIC_BETA_HEADER = "prompt-caching-2024-07-31,pdfs-2024-09-25" + +OPENAI_MODELS = """ +o1 +o1-preview +o1-mini +o3-mini +gpt-4 +gpt-4o +gpt-4o-2024-05-13 +gpt-4-turbo-preview +gpt-4-0314 +gpt-4-0613 +gpt-4-32k +gpt-4-32k-0314 +gpt-4-32k-0613 +gpt-4-turbo +gpt-4-turbo-2024-04-09 +gpt-4-1106-preview +gpt-4-0125-preview +gpt-4-vision-preview +gpt-4-1106-vision-preview +gpt-4o-mini +gpt-4o-mini-2024-07-18 +gpt-3.5-turbo +gpt-3.5-turbo-0301 +gpt-3.5-turbo-0613 +gpt-3.5-turbo-1106 +gpt-3.5-turbo-0125 +gpt-3.5-turbo-16k +gpt-3.5-turbo-16k-0613 +""" + +OPENAI_MODELS = [ln.strip() for ln in OPENAI_MODELS.splitlines() if ln.strip()] + +ANTHROPIC_MODELS = """ +claude-2 +claude-2.1 +claude-3-haiku-20240307 +claude-3-5-haiku-20241022 +claude-3-opus-20240229 +claude-3-sonnet-20240229 +claude-3-5-sonnet-20240620 +claude-3-5-sonnet-20241022 +claude-sonnet-4-20250514 +claude-opus-4-20250514 +""" + +ANTHROPIC_MODELS = [ln.strip() for ln in ANTHROPIC_MODELS.splitlines() if ln.strip()] + +# Mapping of model aliases to their canonical names +MODEL_ALIASES = { + # Claude models + "sonnet": "anthropic/claude-sonnet-4-20250514", + "haiku": "claude-3-5-haiku-20241022", + "opus": "claude-opus-4-20250514", + # GPT models + "4": "gpt-4-0613", + "4o": "gpt-4o", + "4-turbo": "gpt-4-1106-preview", + "35turbo": "gpt-3.5-turbo", + "35-turbo": "gpt-3.5-turbo", + "3": "gpt-3.5-turbo", + # Other models + "deepseek": "deepseek/deepseek-chat", + "flash": "gemini/gemini-2.5-flash", + "flash-lite": "gemini/gemini-2.5-flash-lite", + "quasar": "openrouter/openrouter/quasar-alpha", + "r1": "deepseek/deepseek-reasoner", + "gemini-2.5-pro": "gemini/gemini-2.5-pro", + "gemini": "gemini/gemini-2.5-pro", + "gemini-exp": "gemini/gemini-2.5-pro-exp-03-25", + "grok3": "xai/grok-3-beta", + "optimus": "openrouter/openrouter/optimus-alpha", +} +# Model metadata loaded from resources and user's files. + + +@dataclass +class ModelSettings: + # Model class needs to have each of these as well + name: str + edit_format: str = "whole" + weak_model_name: Optional[str] = None + use_repo_map: bool = False + send_undo_reply: bool = False + lazy: bool = False + overeager: bool = False + reminder: str = "user" + examples_as_sys_msg: bool = False + extra_params: Optional[dict] = None + cache_control: bool = False + caches_by_default: bool = False + use_system_prompt: bool = True + use_temperature: Union[bool, float] = True + streaming: bool = True + editor_model_name: Optional[str] = None + editor_edit_format: Optional[str] = None + reasoning_tag: Optional[str] = None + remove_reasoning: Optional[str] = None # Deprecated alias for reasoning_tag + system_prompt_prefix: Optional[str] = None + accepts_settings: Optional[list] = None + + +# Load model settings from package resource +MODEL_SETTINGS = [] +with importlib.resources.open_text("aider.resources", "model-settings.yml") as f: + model_settings_list = yaml.safe_load(f) + for model_settings_dict in model_settings_list: + MODEL_SETTINGS.append(ModelSettings(**model_settings_dict)) + + +class ModelInfoManager: + MODEL_INFO_URL = ( + "https://raw.githubusercontent.com/BerriAI/litellm/main/" + "model_prices_and_context_window.json" + ) + CACHE_TTL = 60 * 60 * 24 # 24 hours + + def __init__(self): + self.cache_dir = Path.home() / ".aider" / "caches" + self.cache_file = self.cache_dir / "model_prices_and_context_window.json" + self.content = None + self.local_model_metadata = {} + self.verify_ssl = True + self._cache_loaded = False + + # Manager for the cached OpenRouter model database + self.openrouter_manager = OpenRouterModelManager() + + def set_verify_ssl(self, verify_ssl): + self.verify_ssl = verify_ssl + if hasattr(self, "openrouter_manager"): + self.openrouter_manager.set_verify_ssl(verify_ssl) + + def _load_cache(self): + if self._cache_loaded: + return + + try: + self.cache_dir.mkdir(parents=True, exist_ok=True) + if self.cache_file.exists(): + cache_age = time.time() - self.cache_file.stat().st_mtime + if cache_age < self.CACHE_TTL: + try: + self.content = json.loads(self.cache_file.read_text()) + except json.JSONDecodeError: + # If the cache file is corrupted, treat it as missing + self.content = None + except OSError: + pass + + self._cache_loaded = True + + def _update_cache(self): + try: + import requests + + # Respect the --no-verify-ssl switch + response = requests.get(self.MODEL_INFO_URL, timeout=5, verify=self.verify_ssl) + if response.status_code == 200: + self.content = response.json() + try: + self.cache_file.write_text(json.dumps(self.content, indent=4)) + except OSError: + pass + except Exception as ex: + print(str(ex)) + try: + # Save empty dict to cache file on failure + self.cache_file.write_text("{}") + except OSError: + pass + + def get_model_from_cached_json_db(self, model): + data = self.local_model_metadata.get(model) + if data: + return data + + # Ensure cache is loaded before checking content + self._load_cache() + + if not self.content: + self._update_cache() + + if not self.content: + return dict() + + info = self.content.get(model, dict()) + if info: + return info + + pieces = model.split("/") + if len(pieces) == 2: + info = self.content.get(pieces[1]) + if info and info.get("litellm_provider") == pieces[0]: + return info + + return dict() + + def get_model_info(self, model): + cached_info = self.get_model_from_cached_json_db(model) + + litellm_info = None + if litellm._lazy_module or not cached_info: + try: + litellm_info = litellm.get_model_info(model) + except Exception as ex: + if "model_prices_and_context_window.json" not in str(ex): + print(str(ex)) + + if litellm_info: + return litellm_info + + if not cached_info and model.startswith("openrouter/"): + # First try using the locally cached OpenRouter model database + openrouter_info = self.openrouter_manager.get_model_info(model) + if openrouter_info: + return openrouter_info + + # Fallback to legacy web-scraping if the API cache does not contain the model + openrouter_info = self.fetch_openrouter_model_info(model) + if openrouter_info: + return openrouter_info + + return cached_info + + def fetch_openrouter_model_info(self, model): + """ + Fetch model info by scraping the openrouter model page. + Expected URL: https://openrouter.ai/ + Example: openrouter/qwen/qwen-2.5-72b-instruct:free + Returns a dict with keys: max_tokens, max_input_tokens, max_output_tokens, + input_cost_per_token, output_cost_per_token. + """ + url_part = model[len("openrouter/") :] + url = "https://openrouter.ai/" + url_part + try: + import requests + + response = requests.get(url, timeout=5, verify=self.verify_ssl) + if response.status_code != 200: + return {} + html = response.text + import re + + if re.search( + rf"The model\s*.*{re.escape(url_part)}.* is not available", html, re.IGNORECASE + ): + print(f"\033[91mError: Model '{url_part}' is not available\033[0m") + return {} + text = re.sub(r"<[^>]+>", " ", html) + context_match = re.search(r"([\d,]+)\s*context", text) + if context_match: + context_str = context_match.group(1).replace(",", "") + context_size = int(context_str) + else: + context_size = None + input_cost_match = re.search(r"\$\s*([\d.]+)\s*/M input tokens", text, re.IGNORECASE) + output_cost_match = re.search(r"\$\s*([\d.]+)\s*/M output tokens", text, re.IGNORECASE) + input_cost = float(input_cost_match.group(1)) / 1000000 if input_cost_match else None + output_cost = float(output_cost_match.group(1)) / 1000000 if output_cost_match else None + if context_size is None or input_cost is None or output_cost is None: + return {} + params = { + "max_input_tokens": context_size, + "max_tokens": context_size, + "max_output_tokens": context_size, + "input_cost_per_token": input_cost, + "output_cost_per_token": output_cost, + } + return params + except Exception as e: + print("Error fetching openrouter info:", str(e)) + return {} + + +model_info_manager = ModelInfoManager() + + +class Model(ModelSettings): + def __init__( + self, model, weak_model=None, editor_model=None, editor_edit_format=None, verbose=False + ): + # Map any alias to its canonical name + model = MODEL_ALIASES.get(model, model) + + self.name = model + self.verbose = verbose + + self.max_chat_history_tokens = 1024 + self.weak_model = None + self.editor_model = None + + # Find the extra settings + self.extra_model_settings = next( + (ms for ms in MODEL_SETTINGS if ms.name == "aider/extra_params"), None + ) + + self.info = self.get_model_info(model) + + # Are all needed keys/params available? + res = self.validate_environment() + self.missing_keys = res.get("missing_keys") + self.keys_in_environment = res.get("keys_in_environment") + + max_input_tokens = self.info.get("max_input_tokens") or 0 + # Calculate max_chat_history_tokens as 1/16th of max_input_tokens, + # with minimum 1k and maximum 8k + self.max_chat_history_tokens = min(max(max_input_tokens / 16, 1024), 8192) + + self.configure_model_settings(model) + if weak_model is False: + self.weak_model_name = None + else: + self.get_weak_model(weak_model) + + if editor_model is False: + self.editor_model_name = None + else: + self.get_editor_model(editor_model, editor_edit_format) + + def get_model_info(self, model): + return model_info_manager.get_model_info(model) + + def _copy_fields(self, source): + """Helper to copy fields from a ModelSettings instance to self""" + for field in fields(ModelSettings): + val = getattr(source, field.name) + setattr(self, field.name, val) + + # Handle backward compatibility: if remove_reasoning is set but reasoning_tag isn't, + # use remove_reasoning's value for reasoning_tag + if self.reasoning_tag is None and self.remove_reasoning is not None: + self.reasoning_tag = self.remove_reasoning + + def configure_model_settings(self, model): + # Look for exact model match + exact_match = False + for ms in MODEL_SETTINGS: + # direct match, or match "provider/" + if model == ms.name: + self._copy_fields(ms) + exact_match = True + break # Continue to apply overrides + + # Initialize accepts_settings if it's None + if self.accepts_settings is None: + self.accepts_settings = [] + + model = model.lower() + + # If no exact match, try generic settings + if not exact_match: + self.apply_generic_model_settings(model) + + # Apply override settings last if they exist + if ( + self.extra_model_settings + and self.extra_model_settings.extra_params + and self.extra_model_settings.name == "aider/extra_params" + ): + # Initialize extra_params if it doesn't exist + if not self.extra_params: + self.extra_params = {} + + # Deep merge the extra_params dicts + for key, value in self.extra_model_settings.extra_params.items(): + if isinstance(value, dict) and isinstance(self.extra_params.get(key), dict): + # For nested dicts, merge recursively + self.extra_params[key] = {**self.extra_params[key], **value} + else: + # For non-dict values, simply update + self.extra_params[key] = value + + # Ensure OpenRouter models accept thinking_tokens and reasoning_effort + if self.name.startswith("openrouter/"): + if self.accepts_settings is None: + self.accepts_settings = [] + if "thinking_tokens" not in self.accepts_settings: + self.accepts_settings.append("thinking_tokens") + if "reasoning_effort" not in self.accepts_settings: + self.accepts_settings.append("reasoning_effort") + + def apply_generic_model_settings(self, model): + if "/o3-mini" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.use_temperature = False + self.system_prompt_prefix = "Formatting re-enabled. " + self.system_prompt_prefix = "Formatting re-enabled. " + if "reasoning_effort" not in self.accepts_settings: + self.accepts_settings.append("reasoning_effort") + return # <-- + + if "gpt-4.1-mini" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.reminder = "sys" + self.examples_as_sys_msg = False + return # <-- + + if "gpt-4.1" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.reminder = "sys" + self.examples_as_sys_msg = False + return # <-- + + last_segment = model.split("/")[-1] + if last_segment in ("gpt-5", "gpt-5-2025-08-07"): + self.use_temperature = False + self.edit_format = "diff" + if "reasoning_effort" not in self.accepts_settings: + self.accepts_settings.append("reasoning_effort") + return # <-- + + if "/o1-mini" in model: + self.use_repo_map = True + self.use_temperature = False + self.use_system_prompt = False + return # <-- + + if "/o1-preview" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.use_temperature = False + self.use_system_prompt = False + return # <-- + + if "/o1" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.use_temperature = False + self.streaming = False + self.system_prompt_prefix = "Formatting re-enabled. " + if "reasoning_effort" not in self.accepts_settings: + self.accepts_settings.append("reasoning_effort") + return # <-- + + if "deepseek" in model and "v3" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.reminder = "sys" + self.examples_as_sys_msg = True + return # <-- + + if "deepseek" in model and ("r1" in model or "reasoning" in model): + self.edit_format = "diff" + self.use_repo_map = True + self.examples_as_sys_msg = True + self.use_temperature = False + self.reasoning_tag = "think" + return # <-- + + if ("llama3" in model or "llama-3" in model) and "70b" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.send_undo_reply = True + self.examples_as_sys_msg = True + return # <-- + + if "gpt-4-turbo" in model or ("gpt-4-" in model and "-preview" in model): + self.edit_format = "udiff" + self.use_repo_map = True + self.send_undo_reply = True + return # <-- + + if "gpt-4" in model or "claude-3-opus" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.send_undo_reply = True + return # <-- + + if "gpt-3.5" in model or "gpt-4" in model: + self.reminder = "sys" + return # <-- + + if "3-7-sonnet" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.examples_as_sys_msg = True + self.reminder = "user" + if "thinking_tokens" not in self.accepts_settings: + self.accepts_settings.append("thinking_tokens") + return # <-- + + if "3.5-sonnet" in model or "3-5-sonnet" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.examples_as_sys_msg = True + self.reminder = "user" + return # <-- + + if model.startswith("o1-") or "/o1-" in model: + self.use_system_prompt = False + self.use_temperature = False + return # <-- + + if ( + "qwen" in model + and "coder" in model + and ("2.5" in model or "2-5" in model) + and "32b" in model + ): + self.edit_format = "diff" + self.editor_edit_format = "editor-diff" + self.use_repo_map = True + return # <-- + + if "qwq" in model and "32b" in model and "preview" not in model: + self.edit_format = "diff" + self.editor_edit_format = "editor-diff" + self.use_repo_map = True + self.reasoning_tag = "think" + self.examples_as_sys_msg = True + self.use_temperature = 0.6 + self.extra_params = dict(top_p=0.95) + return # <-- + + if "qwen3" in model: + self.edit_format = "diff" + self.use_repo_map = True + if "235b" in model: + self.system_prompt_prefix = "/no_think" + self.use_temperature = 0.7 + self.extra_params = {"top_p": 0.8, "top_k": 20, "min_p": 0.0} + else: + self.examples_as_sys_msg = True + self.use_temperature = 0.6 + self.reasoning_tag = "think" + self.extra_params = {"top_p": 0.95, "top_k": 20, "min_p": 0.0} + return # <-- + + # use the defaults + if self.edit_format == "diff": + self.use_repo_map = True + return # <-- + + def __str__(self): + return self.name + + def get_weak_model(self, provided_weak_model_name): + # If weak_model_name is provided, override the model settings + if provided_weak_model_name: + self.weak_model_name = provided_weak_model_name + + if not self.weak_model_name: + self.weak_model = self + return + + if self.weak_model_name == self.name: + self.weak_model = self + return + + self.weak_model = Model( + self.weak_model_name, + weak_model=False, + ) + return self.weak_model + + def commit_message_models(self): + return [self.weak_model, self] + + def get_editor_model(self, provided_editor_model_name, editor_edit_format): + # If editor_model_name is provided, override the model settings + if provided_editor_model_name: + self.editor_model_name = provided_editor_model_name + if editor_edit_format: + self.editor_edit_format = editor_edit_format + + if not self.editor_model_name or self.editor_model_name == self.name: + self.editor_model = self + else: + self.editor_model = Model( + self.editor_model_name, + editor_model=False, + ) + + if not self.editor_edit_format: + self.editor_edit_format = self.editor_model.edit_format + if self.editor_edit_format in ("diff", "whole", "diff-fenced"): + self.editor_edit_format = "editor-" + self.editor_edit_format + + return self.editor_model + + def tokenizer(self, text): + return litellm.encode(model=self.name, text=text) + + def token_count(self, messages): + if isinstance(messages, dict): + messages = [messages] + + if isinstance(messages, list): + try: + return litellm.token_counter(model=self.name, messages=messages) + except Exception: + pass # fall back to raw tokenizer + + if not self.tokenizer: + return 0 + + if isinstance(messages, str): + msgs = messages + else: + msgs = json.dumps(messages) + + try: + return len(self.tokenizer(msgs)) + except Exception as err: + print(f"Unable to count tokens with tokenizer: {err}") + return 0 + + def token_count_for_image(self, fname): + """ + Calculate the token cost for an image assuming high detail. + The token cost is determined by the size of the image. + :param fname: The filename of the image. + :return: The token cost for the image. + """ + width, height = self.get_image_size(fname) + + # If the image is larger than 2048 in any dimension, scale it down to fit within 2048x2048 + max_dimension = max(width, height) + if max_dimension > 2048: + scale_factor = 2048 / max_dimension + width = int(width * scale_factor) + height = int(height * scale_factor) + + # Scale the image such that the shortest side is 768 pixels long + min_dimension = min(width, height) + scale_factor = 768 / min_dimension + width = int(width * scale_factor) + height = int(height * scale_factor) + + # Calculate the number of 512x512 tiles needed to cover the image + tiles_width = math.ceil(width / 512) + tiles_height = math.ceil(height / 512) + num_tiles = tiles_width * tiles_height + + # Each tile costs 170 tokens, and there's an additional fixed cost of 85 tokens + token_cost = num_tiles * 170 + 85 + return token_cost + + def get_image_size(self, fname): + """ + Retrieve the size of an image. + :param fname: The filename of the image. + :return: A tuple (width, height) representing the image size in pixels. + """ + with Image.open(fname) as img: + return img.size + + def fast_validate_environment(self): + """Fast path for common models. Avoids forcing litellm import.""" + + model = self.name + + pieces = model.split("/") + if len(pieces) > 1: + provider = pieces[0] + else: + provider = None + + keymap = dict( + openrouter="OPENROUTER_API_KEY", + openai="OPENAI_API_KEY", + deepseek="DEEPSEEK_API_KEY", + gemini="GEMINI_API_KEY", + anthropic="ANTHROPIC_API_KEY", + groq="GROQ_API_KEY", + fireworks_ai="FIREWORKS_API_KEY", + ) + var = None + if model in OPENAI_MODELS: + var = "OPENAI_API_KEY" + elif model in ANTHROPIC_MODELS: + var = "ANTHROPIC_API_KEY" + else: + var = keymap.get(provider) + + if var and os.environ.get(var): + return dict(keys_in_environment=[var], missing_keys=[]) + + def validate_environment(self): + res = self.fast_validate_environment() + if res: + return res + + # https://github.com/BerriAI/litellm/issues/3190 + + model = self.name + res = litellm.validate_environment(model) + + # If missing AWS credential keys but AWS_PROFILE is set, consider AWS credentials valid + if res["missing_keys"] and any( + key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] for key in res["missing_keys"] + ): + if model.startswith("bedrock/") or model.startswith("us.anthropic."): + if os.environ.get("AWS_PROFILE"): + res["missing_keys"] = [ + k + for k in res["missing_keys"] + if k not in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] + ] + if not res["missing_keys"]: + res["keys_in_environment"] = True + + if res["keys_in_environment"]: + return res + if res["missing_keys"]: + return res + + provider = self.info.get("litellm_provider", "").lower() + if provider == "cohere_chat": + return validate_variables(["COHERE_API_KEY"]) + if provider == "gemini": + return validate_variables(["GEMINI_API_KEY"]) + if provider == "groq": + return validate_variables(["GROQ_API_KEY"]) + + return res + + def get_repo_map_tokens(self): + map_tokens = 1024 + max_inp_tokens = self.info.get("max_input_tokens") + if max_inp_tokens: + map_tokens = max_inp_tokens / 8 + map_tokens = min(map_tokens, 4096) + map_tokens = max(map_tokens, 1024) + return map_tokens + + def set_reasoning_effort(self, effort): + """Set the reasoning effort parameter for models that support it""" + if effort is not None: + if self.name.startswith("openrouter/"): + if not self.extra_params: + self.extra_params = {} + if "extra_body" not in self.extra_params: + self.extra_params["extra_body"] = {} + self.extra_params["extra_body"]["reasoning"] = {"effort": effort} + else: + if not self.extra_params: + self.extra_params = {} + if "extra_body" not in self.extra_params: + self.extra_params["extra_body"] = {} + self.extra_params["extra_body"]["reasoning_effort"] = effort + + def parse_token_value(self, value): + """ + Parse a token value string into an integer. + Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc. + + Args: + value: String or int token value + + Returns: + Integer token value + """ + if isinstance(value, int): + return value + + if not isinstance(value, str): + return int(value) # Try to convert to int + + value = value.strip().upper() + + if value.endswith("K"): + multiplier = 1024 + value = value[:-1] + elif value.endswith("M"): + multiplier = 1024 * 1024 + value = value[:-1] + else: + multiplier = 1 + + # Convert to float first to handle decimal values like "10.5k" + return int(float(value) * multiplier) + + def set_thinking_tokens(self, value): + """ + Set the thinking token budget for models that support it. + Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc. + Pass "0" to disable thinking tokens. + """ + if value is not None: + num_tokens = self.parse_token_value(value) + self.use_temperature = False + if not self.extra_params: + self.extra_params = {} + + # OpenRouter models use 'reasoning' instead of 'thinking' + if self.name.startswith("openrouter/"): + if "extra_body" not in self.extra_params: + self.extra_params["extra_body"] = {} + if num_tokens > 0: + self.extra_params["extra_body"]["reasoning"] = {"max_tokens": num_tokens} + else: + if "reasoning" in self.extra_params["extra_body"]: + del self.extra_params["extra_body"]["reasoning"] + else: + if num_tokens > 0: + self.extra_params["thinking"] = {"type": "enabled", "budget_tokens": num_tokens} + else: + if "thinking" in self.extra_params: + del self.extra_params["thinking"] + + def get_raw_thinking_tokens(self): + """Get formatted thinking token budget if available""" + budget = None + + if self.extra_params: + # Check for OpenRouter reasoning format + if self.name.startswith("openrouter/"): + if ( + "extra_body" in self.extra_params + and "reasoning" in self.extra_params["extra_body"] + and "max_tokens" in self.extra_params["extra_body"]["reasoning"] + ): + budget = self.extra_params["extra_body"]["reasoning"]["max_tokens"] + # Check for standard thinking format + elif ( + "thinking" in self.extra_params and "budget_tokens" in self.extra_params["thinking"] + ): + budget = self.extra_params["thinking"]["budget_tokens"] + + return budget + + def get_thinking_tokens(self): + budget = self.get_raw_thinking_tokens() + + if budget is not None: + # Format as xx.yK for thousands, xx.yM for millions + if budget >= 1024 * 1024: + value = budget / (1024 * 1024) + if value == int(value): + return f"{int(value)}M" + else: + return f"{value:.1f}M" + else: + value = budget / 1024 + if value == int(value): + return f"{int(value)}k" + else: + return f"{value:.1f}k" + return None + + def get_reasoning_effort(self): + """Get reasoning effort value if available""" + if self.extra_params: + # Check for OpenRouter reasoning format + if self.name.startswith("openrouter/"): + if ( + "extra_body" in self.extra_params + and "reasoning" in self.extra_params["extra_body"] + and "effort" in self.extra_params["extra_body"]["reasoning"] + ): + return self.extra_params["extra_body"]["reasoning"]["effort"] + # Check for standard reasoning_effort format (e.g. in extra_body) + elif ( + "extra_body" in self.extra_params + and "reasoning_effort" in self.extra_params["extra_body"] + ): + return self.extra_params["extra_body"]["reasoning_effort"] + return None + + def is_deepseek(self): + name = self.name.lower() + if "deepseek" not in name: + return + return True + + def is_ollama(self): + return self.name.startswith("ollama/") or self.name.startswith("ollama_chat/") + + async def send_completion( + self, messages, functions, stream, temperature=None, tools=None, max_tokens=None + ): + if os.environ.get("AIDER_SANITY_CHECK_TURNS"): + sanity_check_messages(messages) + + messages = ensure_alternating_roles(messages) + + if self.verbose: + for message in messages: + msg_role = message.get("role") + msg_content = message.get("content") if message.get("content") else "" + msg_trunc = "" + + if message.get("content"): + msg_trunc = message.get("content")[:30] + + print(f"{msg_role} ({len(msg_content)}): {msg_trunc}") + + kwargs = dict(model=self.name, stream=stream) + + if self.use_temperature is not False: + if temperature is None: + if isinstance(self.use_temperature, bool): + temperature = 0 + else: + temperature = float(self.use_temperature) + + kwargs["temperature"] = temperature + + # `tools` is for modern tool usage. `functions` is for legacy/forced calls. + # This handles `base_coder` sending both with same content for `navigator_coder`. + effective_tools = [] + if tools: + effective_tools.extend(tools) + + if functions: + # Convert legacy `functions` to `tools` format and add them + effective_tools.extend([dict(type="function", function=f) for f in functions]) + + if effective_tools: + # Deduplicate tools based on function name + seen_tool_names = set() + deduped_tools = [] + for tool in effective_tools: + tool_name = tool.get("function", {}).get("name") + if tool_name and tool_name not in seen_tool_names: + deduped_tools.append(tool) + seen_tool_names.add(tool_name) + effective_tools = deduped_tools + kwargs["tools"] = effective_tools + + # Forcing a function call is for legacy style `functions` with a single function. + # This is used by ArchitectCoder and not intended for NavigatorCoder's tools. + if functions and len(functions) == 1: + function = functions[0] + + if "name" in function: + tool_name = function.get("name") + if tool_name: + kwargs["tool_choice"] = {"type": "function", "function": {"name": tool_name}} + + if self.extra_params: + kwargs.update(self.extra_params) + + if max_tokens: + kwargs["max_tokens"] = max_tokens + + if "max_tokens" in kwargs and kwargs["max_tokens"]: + kwargs["max_completion_tokens"] = kwargs.pop("max_tokens") + if self.is_ollama() and "num_ctx" not in kwargs: + num_ctx = int(self.token_count(messages) * 1.25) + 8192 + kwargs["num_ctx"] = num_ctx + + key = json.dumps(kwargs, sort_keys=True).encode() + # dump(kwargs) + + hash_object = hashlib.sha1(key) + if "timeout" not in kwargs: + kwargs["timeout"] = request_timeout + if self.verbose: + dump(kwargs) + kwargs["messages"] = messages + + # Are we using github copilot? + if "GITHUB_COPILOT_TOKEN" in os.environ or self.name.startswith("github_copilot/"): + if "extra_headers" not in kwargs: + kwargs["extra_headers"] = { + "Editor-Version": f"aider/{__version__}", + "Copilot-Integration-Id": "vscode-chat", + } + + try: + res = await litellm.acompletion(**kwargs) + except Exception as err: + print(f"LiteLLM API Error: {str(err)}") + res = self.model_error_response() + + if self.verbose: + print(f"LiteLLM API Error: {str(err)}") + raise + + return hash_object, res + + async def simple_send_with_retries(self, messages, max_tokens=None): + from aider.exceptions import LiteLLMExceptions + + litellm_ex = LiteLLMExceptions() + if "deepseek-reasoner" in self.name: + messages = ensure_alternating_roles(messages) + retry_delay = 0.125 + + if self.verbose: + dump(messages) + + while True: + try: + _hash, response = await self.send_completion( + messages=messages, + functions=None, + stream=False, + max_tokens=max_tokens, + ) + if not response or not hasattr(response, "choices") or not response.choices: + return None + res = response.choices[0].message.content + from aider.reasoning_tags import remove_reasoning_content + + return remove_reasoning_content(res, self.reasoning_tag) + + except litellm_ex.exceptions_tuple() as err: + ex_info = litellm_ex.get_ex_info(err) + print(str(err)) + if ex_info.description: + print(ex_info.description) + should_retry = ex_info.retry + if should_retry: + retry_delay *= 2 + if retry_delay > RETRY_TIMEOUT: + should_retry = False + if not should_retry: + return None + print(f"Retrying in {retry_delay:.1f} seconds...") + time.sleep(retry_delay) + continue + except AttributeError: + return None + + async def model_error_response(self): + for i in range(1): + await asyncio.sleep(0.1) + yield litellm.ModelResponse( + choices=[ + litellm.Choices( + finish_reason="stop", + index=0, + message=litellm.Message( + content="Model API Response Error. Please retry the previous request" + ), # Provide an empty message object + ) + ], + model=self.name, + ) + + +def register_models(model_settings_fnames): + files_loaded = [] + for model_settings_fname in model_settings_fnames: + if not os.path.exists(model_settings_fname): + continue + + if not Path(model_settings_fname).read_text().strip(): + continue + + try: + with open(model_settings_fname, "r") as model_settings_file: + model_settings_list = yaml.safe_load(model_settings_file) + + for model_settings_dict in model_settings_list: + model_settings = ModelSettings(**model_settings_dict) + + # Remove all existing settings for this model name + MODEL_SETTINGS[:] = [ms for ms in MODEL_SETTINGS if ms.name != model_settings.name] + # Add the new settings + MODEL_SETTINGS.append(model_settings) + except Exception as e: + raise Exception(f"Error loading model settings from {model_settings_fname}: {e}") + files_loaded.append(model_settings_fname) + + return files_loaded + + +def register_litellm_models(model_fnames): + files_loaded = [] + for model_fname in model_fnames: + if not os.path.exists(model_fname): + continue + + try: + data = Path(model_fname).read_text() + if not data.strip(): + continue + model_def = json5.loads(data) + if not model_def: + continue + + # Defer registration with litellm to faster path. + model_info_manager.local_model_metadata.update(model_def) + except Exception as e: + raise Exception(f"Error loading model definition from {model_fname}: {e}") + + files_loaded.append(model_fname) + + return files_loaded + + +def validate_variables(vars): + missing = [] + for var in vars: + if var not in os.environ: + missing.append(var) + if missing: + return dict(keys_in_environment=False, missing_keys=missing) + return dict(keys_in_environment=True, missing_keys=missing) + + +def sanity_check_models(io, main_model): + problem_main = sanity_check_model(io, main_model) + + problem_weak = None + if main_model.weak_model and main_model.weak_model is not main_model: + problem_weak = sanity_check_model(io, main_model.weak_model) + + problem_editor = None + if ( + main_model.editor_model + and main_model.editor_model is not main_model + and main_model.editor_model is not main_model.weak_model + ): + problem_editor = sanity_check_model(io, main_model.editor_model) + + return problem_main or problem_weak or problem_editor + + +def sanity_check_model(io, model): + show = False + + if model.missing_keys: + show = True + io.tool_warning(f"Warning: {model} expects these environment variables") + for key in model.missing_keys: + value = os.environ.get(key, "") + status = "Set" if value else "Not set" + io.tool_output(f"- {key}: {status}") + + if platform.system() == "Windows": + io.tool_output( + "Note: You may need to restart your terminal or command prompt for `setx` to take" + " effect." + ) + + elif not model.keys_in_environment: + show = True + io.tool_warning(f"Warning for {model}: Unknown which environment variables are required.") + + # Check for model-specific dependencies + check_for_dependencies(io, model.name) + + if not model.info: + show = True + io.tool_warning( + f"Warning for {model}: Unknown context window size and costs, using sane defaults." + ) + + possible_matches = fuzzy_match_models(model.name) + if possible_matches: + io.tool_output("Did you mean one of these?") + for match in possible_matches: + io.tool_output(f"- {match}") + + return show + + +def check_for_dependencies(io, model_name): + """ + Check for model-specific dependencies and install them if needed. + + Args: + io: The IO object for user interaction + model_name: The name of the model to check dependencies for + """ + # Check if this is a Bedrock model and ensure boto3 is installed + if model_name.startswith("bedrock/"): + check_pip_install_extra( + io, "boto3", "AWS Bedrock models require the boto3 package.", ["boto3"] + ) + + # Check if this is a Vertex AI model and ensure google-cloud-aiplatform is installed + elif model_name.startswith("vertex_ai/"): + check_pip_install_extra( + io, + "google.cloud.aiplatform", + "Google Vertex AI models require the google-cloud-aiplatform package.", + ["google-cloud-aiplatform"], + ) + + +def fuzzy_match_models(name): + name = name.lower() + + chat_models = set() + model_metadata = list(litellm.model_cost.items()) + model_metadata += list(model_info_manager.local_model_metadata.items()) + + for orig_model, attrs in model_metadata: + model = orig_model.lower() + if attrs.get("mode") != "chat": + continue + provider = attrs.get("litellm_provider", "").lower() + if not provider: + continue + provider += "/" + + if model.startswith(provider): + fq_model = orig_model + else: + fq_model = provider + orig_model + + chat_models.add(fq_model) + chat_models.add(orig_model) + + chat_models = sorted(chat_models) + # exactly matching model + # matching_models = [ + # (fq,m) for fq,m in chat_models + # if name == fq or name == m + # ] + # if matching_models: + # return matching_models + + # Check for model names containing the name + matching_models = [m for m in chat_models if name in m] + if matching_models: + return sorted(set(matching_models)) + + # Check for slight misspellings + models = set(chat_models) + matching_models = difflib.get_close_matches(name, models, n=3, cutoff=0.8) + + return sorted(set(matching_models)) + + +def print_matching_models(io, search): + matches = fuzzy_match_models(search) + if matches: + io.tool_output(f'Models which match "{search}":') + for model in matches: + io.tool_output(f"- {model}") + else: + io.tool_output(f'No models match "{search}".') + + +def get_model_settings_as_yaml(): + from dataclasses import fields + + import yaml + + model_settings_list = [] + # Add default settings first with all field values + defaults = {} + for field in fields(ModelSettings): + defaults[field.name] = field.default + defaults["name"] = "(default values)" + model_settings_list.append(defaults) + + # Sort model settings by name + for ms in sorted(MODEL_SETTINGS, key=lambda x: x.name): + # Create dict with explicit field order + model_settings_dict = {} + for field in fields(ModelSettings): + value = getattr(ms, field.name) + if value != field.default: + model_settings_dict[field.name] = value + model_settings_list.append(model_settings_dict) + # Add blank line between entries + model_settings_list.append(None) + + # Filter out None values before dumping + yaml_str = yaml.dump( + [ms for ms in model_settings_list if ms is not None], + default_flow_style=False, + sort_keys=False, # Preserve field order from dataclass + ) + # Add actual blank lines between entries + return yaml_str.replace("\n- ", "\n\n- ") + + +def main(): + if len(sys.argv) < 2: + print("Usage: python models.py or python models.py --yaml") + sys.exit(1) + + if sys.argv[1] == "--yaml": + yaml_string = get_model_settings_as_yaml() + print(yaml_string) + else: + model_name = sys.argv[1] + matching_models = fuzzy_match_models(model_name) + + if matching_models: + print(f"Matching models for '{model_name}':") + for model in matching_models: + print(model) + else: + print(f"No matching models found for '{model_name}'.") + + +if __name__ == "__main__": + main() From feb5e7a6dc45509379599deb71e46197415ba019 Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 26 Nov 2025 14:08:47 -0800 Subject: [PATCH 04/17] feat: Deduplicate tool declarations and add a test for it. Co-authored-by: aider (openai/gemini_szmania/gemini-2.5-pro) --- tests/basic/test_models.py | 1252 +++++++++++++++++++----------------- 1 file changed, 648 insertions(+), 604 deletions(-) diff --git a/tests/basic/test_models.py b/tests/basic/test_models.py index 11e42b807af..145323e40cc 100644 --- a/tests/basic/test_models.py +++ b/tests/basic/test_models.py @@ -1,604 +1,648 @@ -import unittest -from unittest.mock import ANY, MagicMock, patch - -from aider.models import ( - ANTHROPIC_BETA_HEADER, - Model, - ModelInfoManager, - register_models, - sanity_check_model, - sanity_check_models, -) - - -class TestModels(unittest.TestCase): - def setUp(self): - """Reset MODEL_SETTINGS before each test""" - from aider.models import MODEL_SETTINGS - - self._original_settings = MODEL_SETTINGS.copy() - - def tearDown(self): - """Restore original MODEL_SETTINGS after each test""" - from aider.models import MODEL_SETTINGS - - MODEL_SETTINGS.clear() - MODEL_SETTINGS.extend(self._original_settings) - - def test_get_model_info_nonexistent(self): - manager = ModelInfoManager() - info = manager.get_model_info("non-existent-model") - self.assertEqual(info, {}) - - def test_max_context_tokens(self): - model = Model("gpt-3.5-turbo") - self.assertEqual(model.info["max_input_tokens"], 16385) - - model = Model("gpt-3.5-turbo-16k") - self.assertEqual(model.info["max_input_tokens"], 16385) - - model = Model("gpt-3.5-turbo-1106") - self.assertEqual(model.info["max_input_tokens"], 16385) - - model = Model("gpt-4") - self.assertEqual(model.info["max_input_tokens"], 8 * 1024) - - model = Model("gpt-4-32k") - self.assertEqual(model.info["max_input_tokens"], 32 * 1024) - - model = Model("gpt-4-0613") - self.assertEqual(model.info["max_input_tokens"], 8 * 1024) - - @patch("os.environ") - def test_sanity_check_model_all_set(self, mock_environ): - mock_environ.get.return_value = "dummy_value" - mock_io = MagicMock() - model = MagicMock() - model.name = "test-model" - model.missing_keys = ["API_KEY1", "API_KEY2"] - model.keys_in_environment = True - model.info = {"some": "info"} - - sanity_check_model(mock_io, model) - - mock_io.tool_output.assert_called() - calls = mock_io.tool_output.call_args_list - self.assertIn("- API_KEY1: Set", str(calls)) - self.assertIn("- API_KEY2: Set", str(calls)) - - @patch("os.environ") - def test_sanity_check_model_not_set(self, mock_environ): - mock_environ.get.return_value = "" - mock_io = MagicMock() - model = MagicMock() - model.name = "test-model" - model.missing_keys = ["API_KEY1", "API_KEY2"] - model.keys_in_environment = True - model.info = {"some": "info"} - - sanity_check_model(mock_io, model) - - mock_io.tool_output.assert_called() - calls = mock_io.tool_output.call_args_list - self.assertIn("- API_KEY1: Not set", str(calls)) - self.assertIn("- API_KEY2: Not set", str(calls)) - - def test_sanity_check_models_bogus_editor(self): - mock_io = MagicMock() - main_model = Model("gpt-4") - main_model.editor_model = Model("bogus-model") - - result = sanity_check_models(mock_io, main_model) - - self.assertTrue( - result - ) # Should return True because there's a problem with the editor model - mock_io.tool_warning.assert_called_with(ANY) # Ensure a warning was issued - - warning_messages = [ - warning_call.args[0] for warning_call in mock_io.tool_warning.call_args_list - ] - print("Warning messages:", warning_messages) # Add this line - - self.assertGreaterEqual(mock_io.tool_warning.call_count, 1) # Expect two warnings - self.assertTrue( - any("bogus-model" in msg for msg in warning_messages) - ) # Check that one of the warnings mentions the bogus model - - @patch("aider.models.check_for_dependencies") - def test_sanity_check_model_calls_check_dependencies(self, mock_check_deps): - """Test that sanity_check_model calls check_for_dependencies""" - mock_io = MagicMock() - model = MagicMock() - model.name = "test-model" - model.missing_keys = [] - model.keys_in_environment = True - model.info = {"some": "info"} - - sanity_check_model(mock_io, model) - - # Verify check_for_dependencies was called with the model name - mock_check_deps.assert_called_once_with(mock_io, "test-model") - - def test_model_aliases(self): - # Test common aliases - model = Model("4") - self.assertEqual(model.name, "gpt-4-0613") - - model = Model("4o") - self.assertEqual(model.name, "gpt-4o") - - model = Model("35turbo") - self.assertEqual(model.name, "gpt-3.5-turbo") - - model = Model("35-turbo") - self.assertEqual(model.name, "gpt-3.5-turbo") - - model = Model("3") - self.assertEqual(model.name, "gpt-3.5-turbo") - - model = Model("sonnet") - self.assertEqual(model.name, "anthropic/claude-sonnet-4-20250514") - - model = Model("haiku") - self.assertEqual(model.name, "claude-3-5-haiku-20241022") - - model = Model("opus") - self.assertEqual(model.name, "claude-opus-4-20250514") - - # Test non-alias passes through unchanged - model = Model("gpt-4") - self.assertEqual(model.name, "gpt-4") - - def test_o1_use_temp_false(self): - # Test GitHub Copilot models - model = Model("github/o1-mini") - self.assertEqual(model.name, "github/o1-mini") - self.assertEqual(model.use_temperature, False) - - model = Model("github/o1-preview") - self.assertEqual(model.name, "github/o1-preview") - self.assertEqual(model.use_temperature, False) - - def test_parse_token_value(self): - # Create a model instance to test the parse_token_value method - model = Model("gpt-4") - - # Test integer inputs - self.assertEqual(model.parse_token_value(8096), 8096) - self.assertEqual(model.parse_token_value(1000), 1000) - - # Test string inputs - self.assertEqual(model.parse_token_value("8096"), 8096) - - # Test k/K suffix (kilobytes) - self.assertEqual(model.parse_token_value("8k"), 8 * 1024) - self.assertEqual(model.parse_token_value("8K"), 8 * 1024) - self.assertEqual(model.parse_token_value("10.5k"), 10.5 * 1024) - self.assertEqual(model.parse_token_value("0.5K"), 0.5 * 1024) - - # Test m/M suffix (megabytes) - self.assertEqual(model.parse_token_value("1m"), 1 * 1024 * 1024) - self.assertEqual(model.parse_token_value("1M"), 1 * 1024 * 1024) - self.assertEqual(model.parse_token_value("0.5M"), 0.5 * 1024 * 1024) - - # Test with spaces - self.assertEqual(model.parse_token_value(" 8k "), 8 * 1024) - - # Test conversion from other types - self.assertEqual(model.parse_token_value(8.0), 8) - - def test_set_thinking_tokens(self): - # Test that set_thinking_tokens correctly sets the tokens with different formats - model = Model("gpt-4") - - # Test with integer - model.set_thinking_tokens(8096) - self.assertEqual(model.extra_params["thinking"]["budget_tokens"], 8096) - self.assertFalse(model.use_temperature) - - # Test with string - model.set_thinking_tokens("10k") - self.assertEqual(model.extra_params["thinking"]["budget_tokens"], 10 * 1024) - - # Test with decimal value - model.set_thinking_tokens("0.5M") - self.assertEqual(model.extra_params["thinking"]["budget_tokens"], 0.5 * 1024 * 1024) - - @patch("aider.models.check_pip_install_extra") - def test_check_for_dependencies_bedrock(self, mock_check_pip): - """Test that check_for_dependencies calls check_pip_install_extra for Bedrock models""" - from aider.io import InputOutput - - io = InputOutput() - - # Test with a Bedrock model - from aider.models import check_for_dependencies - - check_for_dependencies(io, "bedrock/anthropic.claude-3-sonnet-20240229-v1:0") - - # Verify check_pip_install_extra was called with correct arguments - mock_check_pip.assert_called_once_with( - io, "boto3", "AWS Bedrock models require the boto3 package.", ["boto3"] - ) - - @patch("aider.models.check_pip_install_extra") - def test_check_for_dependencies_vertex_ai(self, mock_check_pip): - """Test that check_for_dependencies calls check_pip_install_extra for Vertex AI models""" - from aider.io import InputOutput - - io = InputOutput() - - # Test with a Vertex AI model - from aider.models import check_for_dependencies - - check_for_dependencies(io, "vertex_ai/gemini-1.5-pro") - - # Verify check_pip_install_extra was called with correct arguments - mock_check_pip.assert_called_once_with( - io, - "google.cloud.aiplatform", - "Google Vertex AI models require the google-cloud-aiplatform package.", - ["google-cloud-aiplatform"], - ) - - @patch("aider.models.check_pip_install_extra") - def test_check_for_dependencies_other_model(self, mock_check_pip): - """Test that check_for_dependencies doesn't call check_pip_install_extra for other models""" - from aider.io import InputOutput - - io = InputOutput() - - # Test with a non-Bedrock, non-Vertex AI model - from aider.models import check_for_dependencies - - check_for_dependencies(io, "gpt-4") - - # Verify check_pip_install_extra was not called - mock_check_pip.assert_not_called() - - def test_get_repo_map_tokens(self): - # Test default case (no max_input_tokens in info) - model = Model("gpt-4") - model.info = {} - self.assertEqual(model.get_repo_map_tokens(), 1024) - - # Test minimum boundary (max_input_tokens < 8192) - model.info = {"max_input_tokens": 4096} - self.assertEqual(model.get_repo_map_tokens(), 1024) - - # Test middle range (max_input_tokens = 16384) - model.info = {"max_input_tokens": 16384} - self.assertEqual(model.get_repo_map_tokens(), 2048) - - # Test maximum boundary (max_input_tokens > 32768) - model.info = {"max_input_tokens": 65536} - self.assertEqual(model.get_repo_map_tokens(), 4096) - - # Test exact boundary values - model.info = {"max_input_tokens": 8192} - self.assertEqual(model.get_repo_map_tokens(), 1024) - - model.info = {"max_input_tokens": 32768} - self.assertEqual(model.get_repo_map_tokens(), 4096) - - def test_configure_model_settings(self): - # Test o3-mini case - model = Model("something/o3-mini") - self.assertEqual(model.edit_format, "diff") - self.assertTrue(model.use_repo_map) - self.assertFalse(model.use_temperature) - - # Test o1-mini case - model = Model("something/o1-mini") - self.assertTrue(model.use_repo_map) - self.assertFalse(model.use_temperature) - self.assertFalse(model.use_system_prompt) - - # Test o1-preview case - model = Model("something/o1-preview") - self.assertEqual(model.edit_format, "diff") - self.assertTrue(model.use_repo_map) - self.assertFalse(model.use_temperature) - self.assertFalse(model.use_system_prompt) - - # Test o1 case - model = Model("something/o1") - self.assertEqual(model.edit_format, "diff") - self.assertTrue(model.use_repo_map) - self.assertFalse(model.use_temperature) - self.assertFalse(model.streaming) - - # Test deepseek v3 case - model = Model("deepseek-v3") - self.assertEqual(model.edit_format, "diff") - self.assertTrue(model.use_repo_map) - self.assertEqual(model.reminder, "sys") - self.assertTrue(model.examples_as_sys_msg) - - # Test deepseek reasoner case - model = Model("deepseek-r1") - self.assertEqual(model.edit_format, "diff") - self.assertTrue(model.use_repo_map) - self.assertTrue(model.examples_as_sys_msg) - self.assertFalse(model.use_temperature) - self.assertEqual(model.reasoning_tag, "think") - - # Test provider/deepseek-r1 case - model = Model("someprovider/deepseek-r1") - self.assertEqual(model.edit_format, "diff") - self.assertTrue(model.use_repo_map) - self.assertTrue(model.examples_as_sys_msg) - self.assertFalse(model.use_temperature) - self.assertEqual(model.reasoning_tag, "think") - - # Test provider/deepseek-v3 case - model = Model("anotherprovider/deepseek-v3") - self.assertEqual(model.edit_format, "diff") - self.assertTrue(model.use_repo_map) - self.assertEqual(model.reminder, "sys") - self.assertTrue(model.examples_as_sys_msg) - - # Test llama3 70b case - model = Model("llama3-70b") - self.assertEqual(model.edit_format, "diff") - self.assertTrue(model.use_repo_map) - self.assertTrue(model.send_undo_reply) - self.assertTrue(model.examples_as_sys_msg) - - # Test gpt-4 case - model = Model("gpt-4") - self.assertEqual(model.edit_format, "diff") - self.assertTrue(model.use_repo_map) - self.assertTrue(model.send_undo_reply) - - # Test gpt-3.5 case - model = Model("gpt-3.5") - self.assertEqual(model.reminder, "sys") - - # Test 3.5-sonnet case - model = Model("claude-3.5-sonnet") - self.assertEqual(model.edit_format, "diff") - self.assertTrue(model.use_repo_map) - self.assertTrue(model.examples_as_sys_msg) - self.assertEqual(model.reminder, "user") - - # Test o1- prefix case - model = Model("o1-something") - self.assertFalse(model.use_system_prompt) - self.assertFalse(model.use_temperature) - - # Test qwen case - model = Model("qwen-coder-2.5-32b") - self.assertEqual(model.edit_format, "diff") - self.assertEqual(model.editor_edit_format, "editor-diff") - self.assertTrue(model.use_repo_map) - - def test_aider_extra_model_settings(self): - import tempfile - - import yaml - - # Create temporary YAML file with test settings - test_settings = [ - { - "name": "aider/extra_params", - "extra_params": { - "extra_headers": {"Foo": "bar"}, - "some_param": "some value", - }, - }, - ] - - # Write to a regular file instead of NamedTemporaryFile - # for better cross-platform compatibility - tmp = tempfile.mktemp(suffix=".yml") - try: - with open(tmp, "w") as f: - yaml.dump(test_settings, f) - - # Register the test settings - register_models([tmp]) - - # Test that defaults are applied when no exact match - model = Model("claude-3-5-sonnet-20240620") - # Test that both the override and existing headers are present - model = Model("claude-3-5-sonnet-20240620") - self.assertEqual(model.extra_params["extra_headers"]["Foo"], "bar") - self.assertEqual( - model.extra_params["extra_headers"]["anthropic-beta"], - ANTHROPIC_BETA_HEADER, - ) - self.assertEqual(model.extra_params["some_param"], "some value") - self.assertEqual(model.extra_params["max_tokens"], 8192) - - # Test that exact match overrides defaults but not overrides - model = Model("gpt-4") - self.assertEqual(model.extra_params["extra_headers"]["Foo"], "bar") - self.assertEqual(model.extra_params["some_param"], "some value") - finally: - # Clean up the temporary file - import os - - try: - os.unlink(tmp) - except OSError: - pass - - @patch("aider.models.litellm.acompletion") - @patch.object(Model, "token_count") - async def test_ollama_num_ctx_set_when_missing(self, mock_token_count, mock_completion): - mock_token_count.return_value = 1000 - - model = Model("ollama/llama3") - model.extra_params = {} - messages = [{"role": "user", "content": "Hello"}] - - await model.send_completion(messages, functions=None, stream=False) - - # Verify num_ctx was calculated and added to call - expected_ctx = int(1000 * 1.25) + 8192 # 9442 - mock_completion.assert_called_once_with( - model=model.name, - messages=messages, - stream=False, - temperature=0, - num_ctx=expected_ctx, - timeout=600, - ) - - @patch("aider.models.litellm.acompletion") - async def test_modern_tool_call_propagation(self, mock_completion): - # Test modern tool calling (used for MCP Server Tool Calls) - model = Model("gpt-4") - messages = [{"role": "user", "content": "Hello"}] - - await model.send_completion( - messages, functions=None, stream=False, tools=[dict(type="function", function="test")] - ) - - mock_completion.assert_called_with( - model=model.name, - messages=messages, - stream=False, - tools=[dict(type="function", function="test")], - temperature=0, - timeout=600, - ) - - @patch("aider.models.litellm.acompletion") - async def test_legacy_tool_call_propagation(self, mock_completion): - # Test modern tool calling (used for legacy server tool calling) - model = Model("gpt-4") - messages = [{"role": "user", "content": "Hello"}] - - await model.send_completion(messages, functions=["test"], stream=False) - - mock_completion.assert_called_with( - model=model.name, - messages=messages, - stream=False, - tools=[dict(type="function", function="test")], - temperature=0, - timeout=600, - ) - - @patch("aider.models.litellm.acompletion") - async def test_ollama_uses_existing_num_ctx(self, mock_completion): - model = Model("ollama/llama3") - model.extra_params = {"num_ctx": 4096} - - messages = [{"role": "user", "content": "Hello"}] - await model.send_completion(messages, functions=None, stream=False) - - # Should use provided num_ctx from extra_params - mock_completion.assert_called_once_with( - model=model.name, - messages=messages, - stream=False, - temperature=0, - num_ctx=4096, - timeout=600, - ) - - @patch("aider.models.litellm.acompletion") - async def test_non_ollama_no_num_ctx(self, mock_completion): - model = Model("gpt-4") - model.extra_params = {} - messages = [{"role": "user", "content": "Hello"}] - - await model.send_completion(messages, functions=None, stream=False) - - # Regular models shouldn't get num_ctx - mock_completion.assert_called_once_with( - model=model.name, - messages=messages, - stream=False, - temperature=0, - timeout=600, - ) - self.assertNotIn("num_ctx", mock_completion.call_args.kwargs) - - def test_use_temperature_settings(self): - # Test use_temperature=True (default) uses temperature=0 - model = Model("gpt-4") - self.assertTrue(model.use_temperature) - self.assertEqual(model.use_temperature, True) - - # Test use_temperature=False doesn't pass temperature - model = Model("github/o1-mini") - self.assertFalse(model.use_temperature) - - # Test use_temperature as float value - model = Model("gpt-4") - model.use_temperature = 0.7 - self.assertEqual(model.use_temperature, 0.7) - - @patch("aider.models.litellm.acompletion") - async def test_request_timeout_default(self, mock_completion): - # Test default timeout is used when not specified in extra_params - model = Model("gpt-4") - model.extra_params = {} - messages = [{"role": "user", "content": "Hello"}] - await model.send_completion(messages, functions=None, stream=False) - mock_completion.assert_called_with( - model=model.name, - messages=messages, - stream=False, - temperature=0, - timeout=600, # Default timeout - ) - - @patch("aider.models.litellm.acompletion") - async def test_request_timeout_from_extra_params(self, mock_completion): - # Test timeout from extra_params overrides default - model = Model("gpt-4") - model.extra_params = {"timeout": 300} # 5 minutes - messages = [{"role": "user", "content": "Hello"}] - await model.send_completion(messages, functions=None, stream=False) - mock_completion.assert_called_with( - model=model.name, - messages=messages, - stream=False, - temperature=0, - timeout=300, # From extra_params - ) - - @patch("aider.models.litellm.acompletion") - async def test_use_temperature_in_send_completion(self, mock_completion): - # Test use_temperature=True sends temperature=0 - model = Model("gpt-4") - model.extra_params = {} - messages = [{"role": "user", "content": "Hello"}] - await model.send_completion(messages, functions=None, stream=False) - mock_completion.assert_called_with( - model=model.name, - messages=messages, - stream=False, - temperature=0, - timeout=600, - ) - - # Test use_temperature=False doesn't send temperature - model = Model("github/o1-mini") - messages = [{"role": "user", "content": "Hello"}] - await model.send_completion(messages, functions=None, stream=False) - self.assertNotIn("temperature", mock_completion.call_args.kwargs) - - # Test use_temperature as float sends that value - model = Model("gpt-4") - model.extra_params = {} - model.use_temperature = 0.7 - messages = [{"role": "user", "content": "Hello"}] - await model.send_completion(messages, functions=None, stream=False) - mock_completion.assert_called_with( - model=model.name, - messages=messages, - stream=False, - temperature=0.7, - timeout=600, - ) - - -if __name__ == "__main__": - unittest.main() +import unittest +from unittest.mock import ANY, MagicMock, patch + +from aider.models import ( + ANTHROPIC_BETA_HEADER, + Model, + ModelInfoManager, + register_models, + sanity_check_model, + sanity_check_models, +) + + +class TestModels(unittest.TestCase): + def setUp(self): + """Reset MODEL_SETTINGS before each test""" + from aider.models import MODEL_SETTINGS + + self._original_settings = MODEL_SETTINGS.copy() + + def tearDown(self): + """Restore original MODEL_SETTINGS after each test""" + from aider.models import MODEL_SETTINGS + + MODEL_SETTINGS.clear() + MODEL_SETTINGS.extend(self._original_settings) + + def test_get_model_info_nonexistent(self): + manager = ModelInfoManager() + info = manager.get_model_info("non-existent-model") + self.assertEqual(info, {}) + + def test_max_context_tokens(self): + model = Model("gpt-3.5-turbo") + self.assertEqual(model.info["max_input_tokens"], 16385) + + model = Model("gpt-3.5-turbo-16k") + self.assertEqual(model.info["max_input_tokens"], 16385) + + model = Model("gpt-3.5-turbo-1106") + self.assertEqual(model.info["max_input_tokens"], 16385) + + model = Model("gpt-4") + self.assertEqual(model.info["max_input_tokens"], 8 * 1024) + + model = Model("gpt-4-32k") + self.assertEqual(model.info["max_input_tokens"], 32 * 1024) + + model = Model("gpt-4-0613") + self.assertEqual(model.info["max_input_tokens"], 8 * 1024) + + @patch("os.environ") + def test_sanity_check_model_all_set(self, mock_environ): + mock_environ.get.return_value = "dummy_value" + mock_io = MagicMock() + model = MagicMock() + model.name = "test-model" + model.missing_keys = ["API_KEY1", "API_KEY2"] + model.keys_in_environment = True + model.info = {"some": "info"} + + sanity_check_model(mock_io, model) + + mock_io.tool_output.assert_called() + calls = mock_io.tool_output.call_args_list + self.assertIn("- API_KEY1: Set", str(calls)) + self.assertIn("- API_KEY2: Set", str(calls)) + + @patch("os.environ") + def test_sanity_check_model_not_set(self, mock_environ): + mock_environ.get.return_value = "" + mock_io = MagicMock() + model = MagicMock() + model.name = "test-model" + model.missing_keys = ["API_KEY1", "API_KEY2"] + model.keys_in_environment = True + model.info = {"some": "info"} + + sanity_check_model(mock_io, model) + + mock_io.tool_output.assert_called() + calls = mock_io.tool_output.call_args_list + self.assertIn("- API_KEY1: Not set", str(calls)) + self.assertIn("- API_KEY2: Not set", str(calls)) + + def test_sanity_check_models_bogus_editor(self): + mock_io = MagicMock() + main_model = Model("gpt-4") + main_model.editor_model = Model("bogus-model") + + result = sanity_check_models(mock_io, main_model) + + self.assertTrue( + result + ) # Should return True because there's a problem with the editor model + mock_io.tool_warning.assert_called_with(ANY) # Ensure a warning was issued + + warning_messages = [ + warning_call.args[0] for warning_call in mock_io.tool_warning.call_args_list + ] + print("Warning messages:", warning_messages) # Add this line + + self.assertGreaterEqual(mock_io.tool_warning.call_count, 1) # Expect two warnings + self.assertTrue( + any("bogus-model" in msg for msg in warning_messages) + ) # Check that one of the warnings mentions the bogus model + + @patch("aider.models.check_for_dependencies") + def test_sanity_check_model_calls_check_dependencies(self, mock_check_deps): + """Test that sanity_check_model calls check_for_dependencies""" + mock_io = MagicMock() + model = MagicMock() + model.name = "test-model" + model.missing_keys = [] + model.keys_in_environment = True + model.info = {"some": "info"} + + sanity_check_model(mock_io, model) + + # Verify check_for_dependencies was called with the model name + mock_check_deps.assert_called_once_with(mock_io, "test-model") + + def test_model_aliases(self): + # Test common aliases + model = Model("4") + self.assertEqual(model.name, "gpt-4-0613") + + model = Model("4o") + self.assertEqual(model.name, "gpt-4o") + + model = Model("35turbo") + self.assertEqual(model.name, "gpt-3.5-turbo") + + model = Model("35-turbo") + self.assertEqual(model.name, "gpt-3.5-turbo") + + model = Model("3") + self.assertEqual(model.name, "gpt-3.5-turbo") + + model = Model("sonnet") + self.assertEqual(model.name, "anthropic/claude-sonnet-4-20250514") + + model = Model("haiku") + self.assertEqual(model.name, "claude-3-5-haiku-20241022") + + model = Model("opus") + self.assertEqual(model.name, "claude-opus-4-20250514") + + # Test non-alias passes through unchanged + model = Model("gpt-4") + self.assertEqual(model.name, "gpt-4") + + def test_o1_use_temp_false(self): + # Test GitHub Copilot models + model = Model("github/o1-mini") + self.assertEqual(model.name, "github/o1-mini") + self.assertEqual(model.use_temperature, False) + + model = Model("github/o1-preview") + self.assertEqual(model.name, "github/o1-preview") + self.assertEqual(model.use_temperature, False) + + def test_parse_token_value(self): + # Create a model instance to test the parse_token_value method + model = Model("gpt-4") + + # Test integer inputs + self.assertEqual(model.parse_token_value(8096), 8096) + self.assertEqual(model.parse_token_value(1000), 1000) + + # Test string inputs + self.assertEqual(model.parse_token_value("8096"), 8096) + + # Test k/K suffix (kilobytes) + self.assertEqual(model.parse_token_value("8k"), 8 * 1024) + self.assertEqual(model.parse_token_value("8K"), 8 * 1024) + self.assertEqual(model.parse_token_value("10.5k"), 10.5 * 1024) + self.assertEqual(model.parse_token_value("0.5K"), 0.5 * 1024) + + # Test m/M suffix (megabytes) + self.assertEqual(model.parse_token_value("1m"), 1 * 1024 * 1024) + self.assertEqual(model.parse_token_value("1M"), 1 * 1024 * 1024) + self.assertEqual(model.parse_token_value("0.5M"), 0.5 * 1024 * 1024) + + # Test with spaces + self.assertEqual(model.parse_token_value(" 8k "), 8 * 1024) + + # Test conversion from other types + self.assertEqual(model.parse_token_value(8.0), 8) + + def test_set_thinking_tokens(self): + # Test that set_thinking_tokens correctly sets the tokens with different formats + model = Model("gpt-4") + + # Test with integer + model.set_thinking_tokens(8096) + self.assertEqual(model.extra_params["thinking"]["budget_tokens"], 8096) + self.assertFalse(model.use_temperature) + + # Test with string + model.set_thinking_tokens("10k") + self.assertEqual(model.extra_params["thinking"]["budget_tokens"], 10 * 1024) + + # Test with decimal value + model.set_thinking_tokens("0.5M") + self.assertEqual(model.extra_params["thinking"]["budget_tokens"], 0.5 * 1024 * 1024) + + @patch("aider.models.check_pip_install_extra") + def test_check_for_dependencies_bedrock(self, mock_check_pip): + """Test that check_for_dependencies calls check_pip_install_extra for Bedrock models""" + from aider.io import InputOutput + + io = InputOutput() + + # Test with a Bedrock model + from aider.models import check_for_dependencies + + check_for_dependencies(io, "bedrock/anthropic.claude-3-sonnet-20240229-v1:0") + + # Verify check_pip_install_extra was called with correct arguments + mock_check_pip.assert_called_once_with( + io, "boto3", "AWS Bedrock models require the boto3 package.", ["boto3"] + ) + + @patch("aider.models.check_pip_install_extra") + def test_check_for_dependencies_vertex_ai(self, mock_check_pip): + """Test that check_for_dependencies calls check_pip_install_extra for Vertex AI models""" + from aider.io import InputOutput + + io = InputOutput() + + # Test with a Vertex AI model + from aider.models import check_for_dependencies + + check_for_dependencies(io, "vertex_ai/gemini-1.5-pro") + + # Verify check_pip_install_extra was called with correct arguments + mock_check_pip.assert_called_once_with( + io, + "google.cloud.aiplatform", + "Google Vertex AI models require the google-cloud-aiplatform package.", + ["google-cloud-aiplatform"], + ) + + @patch("aider.models.check_pip_install_extra") + def test_check_for_dependencies_other_model(self, mock_check_pip): + """Test that check_for_dependencies doesn't call check_pip_install_extra for other models""" + from aider.io import InputOutput + + io = InputOutput() + + # Test with a non-Bedrock, non-Vertex AI model + from aider.models import check_for_dependencies + + check_for_dependencies(io, "gpt-4") + + # Verify check_pip_install_extra was not called + mock_check_pip.assert_not_called() + + def test_get_repo_map_tokens(self): + # Test default case (no max_input_tokens in info) + model = Model("gpt-4") + model.info = {} + self.assertEqual(model.get_repo_map_tokens(), 1024) + + # Test minimum boundary (max_input_tokens < 8192) + model.info = {"max_input_tokens": 4096} + self.assertEqual(model.get_repo_map_tokens(), 1024) + + # Test middle range (max_input_tokens = 16384) + model.info = {"max_input_tokens": 16384} + self.assertEqual(model.get_repo_map_tokens(), 2048) + + # Test maximum boundary (max_input_tokens > 32768) + model.info = {"max_input_tokens": 65536} + self.assertEqual(model.get_repo_map_tokens(), 4096) + + # Test exact boundary values + model.info = {"max_input_tokens": 8192} + self.assertEqual(model.get_repo_map_tokens(), 1024) + + model.info = {"max_input_tokens": 32768} + self.assertEqual(model.get_repo_map_tokens(), 4096) + + def test_configure_model_settings(self): + # Test o3-mini case + model = Model("something/o3-mini") + self.assertEqual(model.edit_format, "diff") + self.assertTrue(model.use_repo_map) + self.assertFalse(model.use_temperature) + + # Test o1-mini case + model = Model("something/o1-mini") + self.assertTrue(model.use_repo_map) + self.assertFalse(model.use_temperature) + self.assertFalse(model.use_system_prompt) + + # Test o1-preview case + model = Model("something/o1-preview") + self.assertEqual(model.edit_format, "diff") + self.assertTrue(model.use_repo_map) + self.assertFalse(model.use_temperature) + self.assertFalse(model.use_system_prompt) + + # Test o1 case + model = Model("something/o1") + self.assertEqual(model.edit_format, "diff") + self.assertTrue(model.use_repo_map) + self.assertFalse(model.use_temperature) + self.assertFalse(model.streaming) + + # Test deepseek v3 case + model = Model("deepseek-v3") + self.assertEqual(model.edit_format, "diff") + self.assertTrue(model.use_repo_map) + self.assertEqual(model.reminder, "sys") + self.assertTrue(model.examples_as_sys_msg) + + # Test deepseek reasoner case + model = Model("deepseek-r1") + self.assertEqual(model.edit_format, "diff") + self.assertTrue(model.use_repo_map) + self.assertTrue(model.examples_as_sys_msg) + self.assertFalse(model.use_temperature) + self.assertEqual(model.reasoning_tag, "think") + + # Test provider/deepseek-r1 case + model = Model("someprovider/deepseek-r1") + self.assertEqual(model.edit_format, "diff") + self.assertTrue(model.use_repo_map) + self.assertTrue(model.examples_as_sys_msg) + self.assertFalse(model.use_temperature) + self.assertEqual(model.reasoning_tag, "think") + + # Test provider/deepseek-v3 case + model = Model("anotherprovider/deepseek-v3") + self.assertEqual(model.edit_format, "diff") + self.assertTrue(model.use_repo_map) + self.assertEqual(model.reminder, "sys") + self.assertTrue(model.examples_as_sys_msg) + + # Test llama3 70b case + model = Model("llama3-70b") + self.assertEqual(model.edit_format, "diff") + self.assertTrue(model.use_repo_map) + self.assertTrue(model.send_undo_reply) + self.assertTrue(model.examples_as_sys_msg) + + # Test gpt-4 case + model = Model("gpt-4") + self.assertEqual(model.edit_format, "diff") + self.assertTrue(model.use_repo_map) + self.assertTrue(model.send_undo_reply) + + # Test gpt-3.5 case + model = Model("gpt-3.5") + self.assertEqual(model.reminder, "sys") + + # Test 3.5-sonnet case + model = Model("claude-3.5-sonnet") + self.assertEqual(model.edit_format, "diff") + self.assertTrue(model.use_repo_map) + self.assertTrue(model.examples_as_sys_msg) + self.assertEqual(model.reminder, "user") + + # Test o1- prefix case + model = Model("o1-something") + self.assertFalse(model.use_system_prompt) + self.assertFalse(model.use_temperature) + + # Test qwen case + model = Model("qwen-coder-2.5-32b") + self.assertEqual(model.edit_format, "diff") + self.assertEqual(model.editor_edit_format, "editor-diff") + self.assertTrue(model.use_repo_map) + + def test_aider_extra_model_settings(self): + import tempfile + + import yaml + + # Create temporary YAML file with test settings + test_settings = [ + { + "name": "aider/extra_params", + "extra_params": { + "extra_headers": {"Foo": "bar"}, + "some_param": "some value", + }, + }, + ] + + # Write to a regular file instead of NamedTemporaryFile + # for better cross-platform compatibility + tmp = tempfile.mktemp(suffix=".yml") + try: + with open(tmp, "w") as f: + yaml.dump(test_settings, f) + + # Register the test settings + register_models([tmp]) + + # Test that defaults are applied when no exact match + model = Model("claude-3-5-sonnet-20240620") + # Test that both the override and existing headers are present + model = Model("claude-3-5-sonnet-20240620") + self.assertEqual(model.extra_params["extra_headers"]["Foo"], "bar") + self.assertEqual( + model.extra_params["extra_headers"]["anthropic-beta"], + ANTHROPIC_BETA_HEADER, + ) + self.assertEqual(model.extra_params["some_param"], "some value") + self.assertEqual(model.extra_params["max_tokens"], 8192) + + # Test that exact match overrides defaults but not overrides + model = Model("gpt-4") + self.assertEqual(model.extra_params["extra_headers"]["Foo"], "bar") + self.assertEqual(model.extra_params["some_param"], "some value") + finally: + # Clean up the temporary file + import os + + try: + os.unlink(tmp) + except OSError: + pass + + @patch("aider.models.litellm.acompletion") + @patch.object(Model, "token_count") + async def test_ollama_num_ctx_set_when_missing(self, mock_token_count, mock_completion): + mock_token_count.return_value = 1000 + + model = Model("ollama/llama3") + model.extra_params = {} + messages = [{"role": "user", "content": "Hello"}] + + await model.send_completion(messages, functions=None, stream=False) + + # Verify num_ctx was calculated and added to call + expected_ctx = int(1000 * 1.25) + 8192 # 9442 + mock_completion.assert_called_once_with( + model=model.name, + messages=messages, + stream=False, + temperature=0, + num_ctx=expected_ctx, + timeout=600, + ) + + @patch("aider.models.litellm.acompletion") + async def test_modern_tool_call_propagation(self, mock_completion): + # Test modern tool calling (used for MCP Server Tool Calls) + model = Model("gpt-4") + messages = [{"role": "user", "content": "Hello"}] + + await model.send_completion( + messages, functions=None, stream=False, tools=[dict(type="function", function="test")] + ) + + mock_completion.assert_called_with( + model=model.name, + messages=messages, + stream=False, + tools=[dict(type="function", function="test")], + temperature=0, + timeout=600, + ) + + @patch("aider.models.litellm.acompletion") + async def test_legacy_tool_call_propagation(self, mock_completion): + # Test modern tool calling (used for legacy server tool calling) + model = Model("gpt-4") + messages = [{"role": "user", "content": "Hello"}] + + await model.send_completion(messages, functions=[{"name": "test"}], stream=False) + + mock_completion.assert_called_with( + model=model.name, + messages=messages, + stream=False, + tools=[dict(type="function", function={"name": "test"})], + tool_choice={"type": "function", "function": {"name": "test"}}, + temperature=0, + timeout=600, + ) + + @patch("aider.models.litellm.acompletion") + async def test_send_completion_deduplicates_tools(self, mock_completion): + # Test that send_completion correctly deduplicates tools and functions + model = Model("gpt-4") + messages = [{"role": "user", "content": "Hello"}] + + # Define tools and functions with duplicates + fetch_tool = { + "type": "function", + "function": {"name": "fetch", "description": "fetch url"}, + } + other_tool = { + "type": "function", + "function": {"name": "other", "description": "other tool"}, + } + + tools = [fetch_tool, other_tool, fetch_tool] # Duplicate 'fetch' + functions = [ + {"name": "fetch", "description": "fetch url"}, # Duplicate 'fetch' + {"name": "another", "description": "another tool"}, + ] + + await model.send_completion(messages, functions=functions, stream=False, tools=tools) + + # Verify that acompletion was called + mock_completion.assert_called_once() + + # Get the keyword arguments passed to acompletion + _, kwargs = mock_completion.call_args + + # Check that 'tools' is in the arguments + self.assertIn("tools", kwargs) + + # Check that the tools are deduplicated + final_tools = kwargs["tools"] + self.assertEqual(len(final_tools), 3) + + tool_names = {tool.get("function", {}).get("name") for tool in final_tools} + self.assertEqual(len(tool_names), 3) + self.assertIn("fetch", tool_names) + self.assertIn("other", tool_names) + self.assertIn("another", tool_names) + + @patch("aider.models.litellm.acompletion") + async def test_ollama_uses_existing_num_ctx(self, mock_completion): + model = Model("ollama/llama3") + model.extra_params = {"num_ctx": 4096} + + messages = [{"role": "user", "content": "Hello"}] + await model.send_completion(messages, functions=None, stream=False) + + # Should use provided num_ctx from extra_params + mock_completion.assert_called_once_with( + model=model.name, + messages=messages, + stream=False, + temperature=0, + num_ctx=4096, + timeout=600, + ) + + @patch("aider.models.litellm.acompletion") + async def test_non_ollama_no_num_ctx(self, mock_completion): + model = Model("gpt-4") + model.extra_params = {} + messages = [{"role": "user", "content": "Hello"}] + + await model.send_completion(messages, functions=None, stream=False) + + # Regular models shouldn't get num_ctx + mock_completion.assert_called_once_with( + model=model.name, + messages=messages, + stream=False, + temperature=0, + timeout=600, + ) + self.assertNotIn("num_ctx", mock_completion.call_args.kwargs) + + def test_use_temperature_settings(self): + # Test use_temperature=True (default) uses temperature=0 + model = Model("gpt-4") + self.assertTrue(model.use_temperature) + self.assertEqual(model.use_temperature, True) + + # Test use_temperature=False doesn't pass temperature + model = Model("github/o1-mini") + self.assertFalse(model.use_temperature) + + # Test use_temperature as float value + model = Model("gpt-4") + model.use_temperature = 0.7 + self.assertEqual(model.use_temperature, 0.7) + + @patch("aider.models.litellm.acompletion") + async def test_request_timeout_default(self, mock_completion): + # Test default timeout is used when not specified in extra_params + model = Model("gpt-4") + model.extra_params = {} + messages = [{"role": "user", "content": "Hello"}] + await model.send_completion(messages, functions=None, stream=False) + mock_completion.assert_called_with( + model=model.name, + messages=messages, + stream=False, + temperature=0, + timeout=600, # Default timeout + ) + + @patch("aider.models.litellm.acompletion") + async def test_request_timeout_from_extra_params(self, mock_completion): + # Test timeout from extra_params overrides default + model = Model("gpt-4") + model.extra_params = {"timeout": 300} # 5 minutes + messages = [{"role": "user", "content": "Hello"}] + await model.send_completion(messages, functions=None, stream=False) + mock_completion.assert_called_with( + model=model.name, + messages=messages, + stream=False, + temperature=0, + timeout=300, # From extra_params + ) + + @patch("aider.models.litellm.acompletion") + async def test_use_temperature_in_send_completion(self, mock_completion): + # Test use_temperature=True sends temperature=0 + model = Model("gpt-4") + model.extra_params = {} + messages = [{"role": "user", "content": "Hello"}] + await model.send_completion(messages, functions=None, stream=False) + mock_completion.assert_called_with( + model=model.name, + messages=messages, + stream=False, + temperature=0, + timeout=600, + ) + + # Test use_temperature=False doesn't send temperature + model = Model("github/o1-mini") + messages = [{"role": "user", "content": "Hello"}] + await model.send_completion(messages, functions=None, stream=False) + self.assertNotIn("temperature", mock_completion.call_args.kwargs) + + # Test use_temperature as float sends that value + model = Model("gpt-4") + model.extra_params = {} + model.use_temperature = 0.7 + messages = [{"role": "user", "content": "Hello"}] + await model.send_completion(messages, functions=None, stream=False) + mock_completion.assert_called_with( + model=model.name, + messages=messages, + stream=False, + temperature=0.7, + timeout=600, + ) + + +if __name__ == "__main__": + unittest.main() From 6bab0c131329427546fa7e68edfecf0357f83acb Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 26 Nov 2025 14:20:05 -0800 Subject: [PATCH 05/17] refactor: Simplify test_legacy_tool_call_propagation and remove dedupe test Co-authored-by: aider (openai/gemini_szmania/gemini-2.5-pro) --- tests/basic/test_models.py | 48 ++------------------------------------ 1 file changed, 2 insertions(+), 46 deletions(-) diff --git a/tests/basic/test_models.py b/tests/basic/test_models.py index 145323e40cc..c2a1d6f5d98 100644 --- a/tests/basic/test_models.py +++ b/tests/basic/test_models.py @@ -472,61 +472,17 @@ async def test_legacy_tool_call_propagation(self, mock_completion): model = Model("gpt-4") messages = [{"role": "user", "content": "Hello"}] - await model.send_completion(messages, functions=[{"name": "test"}], stream=False) + await model.send_completion(messages, functions=["test"], stream=False) mock_completion.assert_called_with( model=model.name, messages=messages, stream=False, - tools=[dict(type="function", function={"name": "test"})], - tool_choice={"type": "function", "function": {"name": "test"}}, + tools=[dict(type="function", function="test")], temperature=0, timeout=600, ) - @patch("aider.models.litellm.acompletion") - async def test_send_completion_deduplicates_tools(self, mock_completion): - # Test that send_completion correctly deduplicates tools and functions - model = Model("gpt-4") - messages = [{"role": "user", "content": "Hello"}] - - # Define tools and functions with duplicates - fetch_tool = { - "type": "function", - "function": {"name": "fetch", "description": "fetch url"}, - } - other_tool = { - "type": "function", - "function": {"name": "other", "description": "other tool"}, - } - - tools = [fetch_tool, other_tool, fetch_tool] # Duplicate 'fetch' - functions = [ - {"name": "fetch", "description": "fetch url"}, # Duplicate 'fetch' - {"name": "another", "description": "another tool"}, - ] - - await model.send_completion(messages, functions=functions, stream=False, tools=tools) - - # Verify that acompletion was called - mock_completion.assert_called_once() - - # Get the keyword arguments passed to acompletion - _, kwargs = mock_completion.call_args - - # Check that 'tools' is in the arguments - self.assertIn("tools", kwargs) - - # Check that the tools are deduplicated - final_tools = kwargs["tools"] - self.assertEqual(len(final_tools), 3) - - tool_names = {tool.get("function", {}).get("name") for tool in final_tools} - self.assertEqual(len(tool_names), 3) - self.assertIn("fetch", tool_names) - self.assertIn("other", tool_names) - self.assertIn("another", tool_names) - @patch("aider.models.litellm.acompletion") async def test_ollama_uses_existing_num_ctx(self, mock_completion): model = Model("ollama/llama3") From 6c6ac4bcea7a1c8eaf2f1032188a26af30aace6e Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 26 Nov 2025 14:24:44 -0800 Subject: [PATCH 06/17] updated .gitignore --- .gitignore | 69 +++++++++++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/.gitignore b/.gitignore index b34f19f0644..27db8ef6ecc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,34 +1,35 @@ -# Ignore everything -* - -# But descend into directories -!*/ - -# Recursively allow files under subtree -!/.github/** -!/aider/** -!/benchmark/** -!/docker/** -!/requirements/** -!/scripts/** -!/tests/** - -# Specific Files -!/.dockerignore -!/.flake8 -!/.gitignore -!/.pre-commit-config.yaml -!/CNAME -!/CONTRIBUTING.metadata -!/HISTORY.md -!/LICENSE.txt -!/MANIFEST.in -!/pyproject.toml -!/pytest.ini -!/README.md -!/requirements.txt - -# Ignore specific files -aider/__version__.py -aider/_version.py -*.pyc +# Ignore everything +* + +# But descend into directories +!*/ + +# Recursively allow files under subtree +!/.github/** +!/aider/** +!/benchmark/** +!/docker/** +!/requirements/** +!/scripts/** +!/tests/** + +# Specific Files +!/.dockerignore +!/.flake8 +!/.gitignore +!/.pre-commit-config.yaml +!/CNAME +!/CONTRIBUTING.metadata +!/HISTORY.md +!/LICENSE.txt +!/MANIFEST.in +!/pyproject.toml +!/pytest.ini +!/README.md +!/requirements.txt + +# Ignore specific files +aider/__version__.py +aider/_version.py +*.pyc +.aider* From fc7bffaf4decc6003d2cff550bd2a95424627b96 Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 26 Nov 2025 15:37:59 -0800 Subject: [PATCH 07/17] feat: Fix line endings in changed files since v0.88.10 Co-authored-by: aider (openai/gemini_szmania/gemini-2.5-pro) --- fix_line_endings.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 fix_line_endings.py diff --git a/fix_line_endings.py b/fix_line_endings.py new file mode 100644 index 00000000000..6410824268a --- /dev/null +++ b/fix_line_endings.py @@ -0,0 +1,26 @@ +import sys + +def fix_line_endings(file_path): + """ + Converts the line endings of a file from CRLF to LF. + """ + try: + with open(file_path, 'r', newline='', encoding='utf-8') as f: + content = f.read() + + with open(file_path, 'w', newline='\n', encoding='utf-8') as f: + f.write(content) + + print(f"Successfully converted line endings for: {file_path}") + + except Exception as e: + print(f"Error processing file {file_path}: {e}", file=sys.stderr) + sys.exit(1) + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: python fix_line_endings.py ", file=sys.stderr) + sys.exit(1) + + file_to_fix = sys.argv[1] + fix_line_endings(file_to_fix) From bee371eae163cac15cbe9f3527fb46b818ffcfec Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 26 Nov 2025 16:29:17 -0800 Subject: [PATCH 08/17] converted line endings from windows to unix --- .gitignore | 74 +- aider/exceptions.py | 230 +-- aider/models.py | 2666 ++++++++++++++++---------------- tests/basic/test_exceptions.py | 194 +-- 4 files changed, 1582 insertions(+), 1582 deletions(-) diff --git a/.gitignore b/.gitignore index 81b87692bd2..d2cffc639ff 100644 --- a/.gitignore +++ b/.gitignore @@ -1,37 +1,37 @@ -# Ignore everything -* - -# But descend into directories -!*/ - -# Recursively allow files under subtree -!/.github/** -!/aider/** -!/benchmark/** -!/docker/** -!/requirements/** -!/scripts/** -!/tests/** - -# Specific Files -!/.dockerignore -!/.flake8 -!/.gitignore -!/.pre-commit-config.yaml -!/CHANGELOG.md -!/CNAME -!/CONTRIBUTING.metadata -!/HISTORY.md -!/LICENSE.txt -!/MANIFEST.in -!/pyproject.toml -!/pytest.ini -!/README.md -!/requirements.txt - -# Ignore specific files -aider/__version__.py -aider/_version.py -*.pyc -.aider* -env/ +# Ignore everything +* + +# But descend into directories +!*/ + +# Recursively allow files under subtree +!/.github/** +!/aider/** +!/benchmark/** +!/docker/** +!/requirements/** +!/scripts/** +!/tests/** + +# Specific Files +!/.dockerignore +!/.flake8 +!/.gitignore +!/.pre-commit-config.yaml +!/CHANGELOG.md +!/CNAME +!/CONTRIBUTING.metadata +!/HISTORY.md +!/LICENSE.txt +!/MANIFEST.in +!/pyproject.toml +!/pytest.ini +!/README.md +!/requirements.txt + +# Ignore specific files +aider/__version__.py +aider/_version.py +*.pyc +.aider* +env/ diff --git a/aider/exceptions.py b/aider/exceptions.py index b158b2d0524..5fb84d992c6 100644 --- a/aider/exceptions.py +++ b/aider/exceptions.py @@ -1,115 +1,115 @@ -from dataclasses import dataclass - -from aider.dump import dump # noqa: F401 - - -@dataclass -class ExInfo: - name: str - retry: bool - description: str - - -EXCEPTIONS = [ - ExInfo("APIConnectionError", True, None), - ExInfo("APIError", True, None), - ExInfo("APIResponseValidationError", True, None), - ExInfo( - "AuthenticationError", - False, - "The API provider is not able to authenticate you. Check your API key.", - ), - ExInfo("AzureOpenAIError", True, None), - ExInfo("BadGatewayError", False, None), - ExInfo("BadRequestError", False, None), - ExInfo("BudgetExceededError", True, None), - ExInfo( - "ContentPolicyViolationError", - True, - "The API provider has refused the request due to a safety policy about the content.", - ), - ExInfo("ContextWindowExceededError", False, None), # special case handled in base_coder - ExInfo("ErrorEventError", True, None), - ExInfo("ImageFetchError", True, "The API cannot fetch an image"), - ExInfo("InternalServerError", True, "The API provider's servers are down or overloaded."), - ExInfo("InvalidRequestError", True, None), - ExInfo("JSONSchemaValidationError", True, None), - ExInfo("NotFoundError", False, None), - ExInfo("OpenAIError", True, None), - ExInfo( - "RateLimitError", - True, - "The API provider has rate limited you. Try again later or check your quotas.", - ), - ExInfo("RouterRateLimitError", True, None), - ExInfo("ServiceUnavailableError", True, "The API provider's servers are down or overloaded."), - ExInfo("UnprocessableEntityError", True, None), - ExInfo("UnsupportedParamsError", True, None), - ExInfo( - "Timeout", - True, - "The API provider timed out without returning a response. They may be down or overloaded.", - ), -] - - -class LiteLLMExceptions: - exceptions = dict() - exception_info = {exi.name: exi for exi in EXCEPTIONS} - - def __init__(self): - self._load() - - def _load(self, strict=False): - import litellm - - for var in dir(litellm): - if var.endswith("Error"): - if var not in self.exception_info: - raise ValueError(f"{var} is in litellm but not in aider's exceptions list") - - for var in self.exception_info: - ex = getattr(litellm, var, "default") - - if ex != "default": - if not issubclass(ex, BaseException): - continue - - self.exceptions[ex] = self.exception_info[var] - - def exceptions_tuple(self): - return tuple(self.exceptions) - - def get_ex_info(self, ex): - """Return the ExInfo for a given exception instance""" - import litellm - - if ex.__class__ is litellm.APIConnectionError: - if "google.auth" in str(ex): - return ExInfo( - "APIConnectionError", False, "You need to: pip install google-generativeai" - ) - if "boto3" in str(ex): - return ExInfo("APIConnectionError", False, "You need to: pip install boto3") - if "OpenrouterException" in str(ex) and "'choices'" in str(ex): - return ExInfo( - "APIConnectionError", - True, - ( - "OpenRouter or the upstream API provider is down, overloaded or rate" - " limiting your requests." - ), - ) - - # Check for specific non-retryable APIError cases like insufficient credits - if ex.__class__ is litellm.APIError: - err_str = str(ex).lower() - if "insufficient credits" in err_str and '"code":402' in err_str: - return ExInfo( - "APIError", - False, - "Insufficient credits with the API provider. Please add credits.", - ) - # Fall through to default APIError handling if not the specific credits error - - return self.exceptions.get(ex.__class__, ExInfo(None, None, None)) +from dataclasses import dataclass + +from aider.dump import dump # noqa: F401 + + +@dataclass +class ExInfo: + name: str + retry: bool + description: str + + +EXCEPTIONS = [ + ExInfo("APIConnectionError", True, None), + ExInfo("APIError", True, None), + ExInfo("APIResponseValidationError", True, None), + ExInfo( + "AuthenticationError", + False, + "The API provider is not able to authenticate you. Check your API key.", + ), + ExInfo("AzureOpenAIError", True, None), + ExInfo("BadGatewayError", False, None), + ExInfo("BadRequestError", False, None), + ExInfo("BudgetExceededError", True, None), + ExInfo( + "ContentPolicyViolationError", + True, + "The API provider has refused the request due to a safety policy about the content.", + ), + ExInfo("ContextWindowExceededError", False, None), # special case handled in base_coder + ExInfo("ErrorEventError", True, None), + ExInfo("ImageFetchError", True, "The API cannot fetch an image"), + ExInfo("InternalServerError", True, "The API provider's servers are down or overloaded."), + ExInfo("InvalidRequestError", True, None), + ExInfo("JSONSchemaValidationError", True, None), + ExInfo("NotFoundError", False, None), + ExInfo("OpenAIError", True, None), + ExInfo( + "RateLimitError", + True, + "The API provider has rate limited you. Try again later or check your quotas.", + ), + ExInfo("RouterRateLimitError", True, None), + ExInfo("ServiceUnavailableError", True, "The API provider's servers are down or overloaded."), + ExInfo("UnprocessableEntityError", True, None), + ExInfo("UnsupportedParamsError", True, None), + ExInfo( + "Timeout", + True, + "The API provider timed out without returning a response. They may be down or overloaded.", + ), +] + + +class LiteLLMExceptions: + exceptions = dict() + exception_info = {exi.name: exi for exi in EXCEPTIONS} + + def __init__(self): + self._load() + + def _load(self, strict=False): + import litellm + + for var in dir(litellm): + if var.endswith("Error"): + if var not in self.exception_info: + raise ValueError(f"{var} is in litellm but not in aider's exceptions list") + + for var in self.exception_info: + ex = getattr(litellm, var, "default") + + if ex != "default": + if not issubclass(ex, BaseException): + continue + + self.exceptions[ex] = self.exception_info[var] + + def exceptions_tuple(self): + return tuple(self.exceptions) + + def get_ex_info(self, ex): + """Return the ExInfo for a given exception instance""" + import litellm + + if ex.__class__ is litellm.APIConnectionError: + if "google.auth" in str(ex): + return ExInfo( + "APIConnectionError", False, "You need to: pip install google-generativeai" + ) + if "boto3" in str(ex): + return ExInfo("APIConnectionError", False, "You need to: pip install boto3") + if "OpenrouterException" in str(ex) and "'choices'" in str(ex): + return ExInfo( + "APIConnectionError", + True, + ( + "OpenRouter or the upstream API provider is down, overloaded or rate" + " limiting your requests." + ), + ) + + # Check for specific non-retryable APIError cases like insufficient credits + if ex.__class__ is litellm.APIError: + err_str = str(ex).lower() + if "insufficient credits" in err_str and '"code":402' in err_str: + return ExInfo( + "APIError", + False, + "Insufficient credits with the API provider. Please add credits.", + ) + # Fall through to default APIError handling if not the specific credits error + + return self.exceptions.get(ex.__class__, ExInfo(None, None, None)) diff --git a/aider/models.py b/aider/models.py index 3456d8b913e..9d210ebc4e3 100644 --- a/aider/models.py +++ b/aider/models.py @@ -1,1333 +1,1333 @@ -import asyncio -import difflib -import hashlib -import importlib.resources -import json -import math -import os -import platform -import sys -import time -from dataclasses import dataclass, fields -from pathlib import Path -from typing import Optional, Union - -import json5 -import yaml -from PIL import Image - -from aider import __version__ -from aider.dump import dump # noqa: F401 -from aider.helpers.requests import model_request_parser -from aider.llm import litellm -from aider.openrouter import OpenRouterModelManager -from aider.sendchat import sanity_check_messages -from aider.utils import check_pip_install_extra - -RETRY_TIMEOUT = 60 - -request_timeout = 600 - -DEFAULT_MODEL_NAME = "gpt-4o" -ANTHROPIC_BETA_HEADER = "prompt-caching-2024-07-31,pdfs-2024-09-25" - -OPENAI_MODELS = """ -o1 -o1-preview -o1-mini -o3-mini -gpt-4 -gpt-4o -gpt-4o-2024-05-13 -gpt-4-turbo-preview -gpt-4-0314 -gpt-4-0613 -gpt-4-32k -gpt-4-32k-0314 -gpt-4-32k-0613 -gpt-4-turbo -gpt-4-turbo-2024-04-09 -gpt-4-1106-preview -gpt-4-0125-preview -gpt-4-vision-preview -gpt-4-1106-vision-preview -gpt-4o-mini -gpt-4o-mini-2024-07-18 -gpt-3.5-turbo -gpt-3.5-turbo-0301 -gpt-3.5-turbo-0613 -gpt-3.5-turbo-1106 -gpt-3.5-turbo-0125 -gpt-3.5-turbo-16k -gpt-3.5-turbo-16k-0613 -""" - -OPENAI_MODELS = [ln.strip() for ln in OPENAI_MODELS.splitlines() if ln.strip()] - -ANTHROPIC_MODELS = """ -claude-2 -claude-2.1 -claude-3-haiku-20240307 -claude-3-5-haiku-20241022 -claude-3-opus-20240229 -claude-3-sonnet-20240229 -claude-3-5-sonnet-20240620 -claude-3-5-sonnet-20241022 -claude-sonnet-4-20250514 -claude-opus-4-20250514 -""" - -ANTHROPIC_MODELS = [ln.strip() for ln in ANTHROPIC_MODELS.splitlines() if ln.strip()] - -# Mapping of model aliases to their canonical names -MODEL_ALIASES = { - # Claude models - "sonnet": "anthropic/claude-sonnet-4-20250514", - "haiku": "claude-3-5-haiku-20241022", - "opus": "claude-opus-4-20250514", - # GPT models - "4": "gpt-4-0613", - "4o": "gpt-4o", - "4-turbo": "gpt-4-1106-preview", - "35turbo": "gpt-3.5-turbo", - "35-turbo": "gpt-3.5-turbo", - "3": "gpt-3.5-turbo", - # Other models - "deepseek": "deepseek/deepseek-chat", - "flash": "gemini/gemini-2.5-flash", - "flash-lite": "gemini/gemini-2.5-flash-lite", - "quasar": "openrouter/openrouter/quasar-alpha", - "r1": "deepseek/deepseek-reasoner", - "gemini-2.5-pro": "gemini/gemini-2.5-pro", - "gemini-3-pro-preview": "gemini/gemini-3-pro-preview", - "gemini": "gemini/gemini-3-pro-preview", - "gemini-exp": "gemini/gemini-2.5-pro-exp-03-25", - "grok3": "xai/grok-3-beta", - "optimus": "openrouter/openrouter/optimus-alpha", -} -# Model metadata loaded from resources and user's files. - - -@dataclass -class ModelSettings: - # Model class needs to have each of these as well - name: str - edit_format: str = "whole" - weak_model_name: Optional[str] = None - use_repo_map: bool = False - send_undo_reply: bool = False - lazy: bool = False - overeager: bool = False - reminder: str = "user" - examples_as_sys_msg: bool = False - extra_params: Optional[dict] = None - cache_control: bool = False - caches_by_default: bool = False - use_system_prompt: bool = True - use_temperature: Union[bool, float] = True - streaming: bool = True - editor_model_name: Optional[str] = None - editor_edit_format: Optional[str] = None - reasoning_tag: Optional[str] = None - remove_reasoning: Optional[str] = None # Deprecated alias for reasoning_tag - system_prompt_prefix: Optional[str] = None - accepts_settings: Optional[list] = None - - -# Load model settings from package resource -MODEL_SETTINGS = [] -with importlib.resources.open_text("aider.resources", "model-settings.yml") as f: - model_settings_list = yaml.safe_load(f) - for model_settings_dict in model_settings_list: - MODEL_SETTINGS.append(ModelSettings(**model_settings_dict)) - - -class ModelInfoManager: - MODEL_INFO_URL = ( - "https://raw.githubusercontent.com/BerriAI/litellm/main/" - "model_prices_and_context_window.json" - ) - CACHE_TTL = 60 * 60 * 24 # 24 hours - - def __init__(self): - self.cache_dir = Path.home() / ".aider" / "caches" - self.cache_file = self.cache_dir / "model_prices_and_context_window.json" - self.content = None - self.local_model_metadata = {} - self.verify_ssl = True - self._cache_loaded = False - - # Manager for the cached OpenRouter model database - self.openrouter_manager = OpenRouterModelManager() - - def set_verify_ssl(self, verify_ssl): - self.verify_ssl = verify_ssl - if hasattr(self, "openrouter_manager"): - self.openrouter_manager.set_verify_ssl(verify_ssl) - - def _load_cache(self): - if self._cache_loaded: - return - - try: - self.cache_dir.mkdir(parents=True, exist_ok=True) - if self.cache_file.exists(): - cache_age = time.time() - self.cache_file.stat().st_mtime - if cache_age < self.CACHE_TTL: - try: - self.content = json.loads(self.cache_file.read_text()) - except json.JSONDecodeError: - # If the cache file is corrupted, treat it as missing - self.content = None - except OSError: - pass - - self._cache_loaded = True - - def _update_cache(self): - try: - import requests - - # Respect the --no-verify-ssl switch - response = requests.get(self.MODEL_INFO_URL, timeout=5, verify=self.verify_ssl) - if response.status_code == 200: - self.content = response.json() - try: - self.cache_file.write_text(json.dumps(self.content, indent=4)) - except OSError: - pass - except Exception as ex: - print(str(ex)) - try: - # Save empty dict to cache file on failure - self.cache_file.write_text("{}") - except OSError: - pass - - def get_model_from_cached_json_db(self, model): - data = self.local_model_metadata.get(model) - if data: - return data - - # Ensure cache is loaded before checking content - self._load_cache() - - if not self.content: - self._update_cache() - - if not self.content: - return dict() - - info = self.content.get(model, dict()) - if info: - return info - - pieces = model.split("/") - if len(pieces) == 2: - info = self.content.get(pieces[1]) - if info and info.get("litellm_provider") == pieces[0]: - return info - - return dict() - - def get_model_info(self, model): - cached_info = self.get_model_from_cached_json_db(model) - - litellm_info = None - if litellm._lazy_module or not cached_info: - try: - litellm_info = litellm.get_model_info(model) - except Exception as ex: - if "model_prices_and_context_window.json" not in str(ex): - print(str(ex)) - - if litellm_info: - return litellm_info - - if not cached_info and model.startswith("openrouter/"): - # First try using the locally cached OpenRouter model database - openrouter_info = self.openrouter_manager.get_model_info(model) - if openrouter_info: - return openrouter_info - - # Fallback to legacy web-scraping if the API cache does not contain the model - openrouter_info = self.fetch_openrouter_model_info(model) - if openrouter_info: - return openrouter_info - - return cached_info - - def fetch_openrouter_model_info(self, model): - """ - Fetch model info by scraping the openrouter model page. - Expected URL: https://openrouter.ai/ - Example: openrouter/qwen/qwen-2.5-72b-instruct:free - Returns a dict with keys: max_tokens, max_input_tokens, max_output_tokens, - input_cost_per_token, output_cost_per_token. - """ - url_part = model[len("openrouter/") :] - url = "https://openrouter.ai/" + url_part - try: - import requests - - response = requests.get(url, timeout=5, verify=self.verify_ssl) - if response.status_code != 200: - return {} - html = response.text - import re - - if re.search( - rf"The model\s*.*{re.escape(url_part)}.* is not available", html, re.IGNORECASE - ): - print(f"\033[91mError: Model '{url_part}' is not available\033[0m") - return {} - text = re.sub(r"<[^>]+>", " ", html) - context_match = re.search(r"([\d,]+)\s*context", text) - if context_match: - context_str = context_match.group(1).replace(",", "") - context_size = int(context_str) - else: - context_size = None - input_cost_match = re.search(r"\$\s*([\d.]+)\s*/M input tokens", text, re.IGNORECASE) - output_cost_match = re.search(r"\$\s*([\d.]+)\s*/M output tokens", text, re.IGNORECASE) - input_cost = float(input_cost_match.group(1)) / 1000000 if input_cost_match else None - output_cost = float(output_cost_match.group(1)) / 1000000 if output_cost_match else None - if context_size is None or input_cost is None or output_cost is None: - return {} - params = { - "max_input_tokens": context_size, - "max_tokens": context_size, - "max_output_tokens": context_size, - "input_cost_per_token": input_cost, - "output_cost_per_token": output_cost, - } - return params - except Exception as e: - print("Error fetching openrouter info:", str(e)) - return {} - - -model_info_manager = ModelInfoManager() - - -class Model(ModelSettings): - def __init__( - self, model, weak_model=None, editor_model=None, editor_edit_format=None, verbose=False - ): - # Map any alias to its canonical name - model = MODEL_ALIASES.get(model, model) - - self.name = model - self.verbose = verbose - - self.max_chat_history_tokens = 1024 - self.weak_model = None - self.editor_model = None - - # Find the extra settings - self.extra_model_settings = next( - (ms for ms in MODEL_SETTINGS if ms.name == "aider/extra_params"), None - ) - - self.info = self.get_model_info(model) - - # Are all needed keys/params available? - res = self.validate_environment() - self.missing_keys = res.get("missing_keys") - self.keys_in_environment = res.get("keys_in_environment") - - max_input_tokens = self.info.get("max_input_tokens") or 0 - # Calculate max_chat_history_tokens as 1/16th of max_input_tokens, - # with minimum 1k and maximum 8k - self.max_chat_history_tokens = min(max(max_input_tokens / 16, 1024), 8192) - - self.configure_model_settings(model) - if weak_model is False: - self.weak_model_name = None - else: - self.get_weak_model(weak_model) - - if editor_model is False: - self.editor_model_name = None - else: - self.get_editor_model(editor_model, editor_edit_format) - - def get_model_info(self, model): - return model_info_manager.get_model_info(model) - - def _copy_fields(self, source): - """Helper to copy fields from a ModelSettings instance to self""" - for field in fields(ModelSettings): - val = getattr(source, field.name) - setattr(self, field.name, val) - - # Handle backward compatibility: if remove_reasoning is set but reasoning_tag isn't, - # use remove_reasoning's value for reasoning_tag - if self.reasoning_tag is None and self.remove_reasoning is not None: - self.reasoning_tag = self.remove_reasoning - - def configure_model_settings(self, model): - # Look for exact model match - exact_match = False - for ms in MODEL_SETTINGS: - # direct match, or match "provider/" - if model == ms.name: - self._copy_fields(ms) - exact_match = True - break # Continue to apply overrides - - # Initialize accepts_settings if it's None - if self.accepts_settings is None: - self.accepts_settings = [] - - model = model.lower() - - # If no exact match, try generic settings - if not exact_match: - self.apply_generic_model_settings(model) - - # Apply override settings last if they exist - if ( - self.extra_model_settings - and self.extra_model_settings.extra_params - and self.extra_model_settings.name == "aider/extra_params" - ): - # Initialize extra_params if it doesn't exist - if not self.extra_params: - self.extra_params = {} - - # Deep merge the extra_params dicts - for key, value in self.extra_model_settings.extra_params.items(): - if isinstance(value, dict) and isinstance(self.extra_params.get(key), dict): - # For nested dicts, merge recursively - self.extra_params[key] = {**self.extra_params[key], **value} - else: - # For non-dict values, simply update - self.extra_params[key] = value - - # Ensure OpenRouter models accept thinking_tokens and reasoning_effort - if self.name.startswith("openrouter/"): - if self.accepts_settings is None: - self.accepts_settings = [] - if "thinking_tokens" not in self.accepts_settings: - self.accepts_settings.append("thinking_tokens") - if "reasoning_effort" not in self.accepts_settings: - self.accepts_settings.append("reasoning_effort") - - def apply_generic_model_settings(self, model): - if "/o3-mini" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.use_temperature = False - self.system_prompt_prefix = "Formatting re-enabled. " - self.system_prompt_prefix = "Formatting re-enabled. " - if "reasoning_effort" not in self.accepts_settings: - self.accepts_settings.append("reasoning_effort") - return # <-- - - if "gpt-4.1-mini" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.reminder = "sys" - self.examples_as_sys_msg = False - return # <-- - - if "gpt-4.1" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.reminder = "sys" - self.examples_as_sys_msg = False - return # <-- - - last_segment = model.split("/")[-1] - if last_segment in ("gpt-5", "gpt-5-2025-08-07") or "gpt-5.1" in model: - self.use_temperature = False - self.edit_format = "diff" - if "reasoning_effort" not in self.accepts_settings: - self.accepts_settings.append("reasoning_effort") - return # <-- - - if "/o1-mini" in model: - self.use_repo_map = True - self.use_temperature = False - self.use_system_prompt = False - return # <-- - - if "/o1-preview" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.use_temperature = False - self.use_system_prompt = False - return # <-- - - if "/o1" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.use_temperature = False - self.streaming = False - self.system_prompt_prefix = "Formatting re-enabled. " - if "reasoning_effort" not in self.accepts_settings: - self.accepts_settings.append("reasoning_effort") - return # <-- - - if "deepseek" in model and "v3" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.reminder = "sys" - self.examples_as_sys_msg = True - return # <-- - - if "deepseek" in model and ("r1" in model or "reasoning" in model): - self.edit_format = "diff" - self.use_repo_map = True - self.examples_as_sys_msg = True - self.use_temperature = False - self.reasoning_tag = "think" - return # <-- - - if ("llama3" in model or "llama-3" in model) and "70b" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.send_undo_reply = True - self.examples_as_sys_msg = True - return # <-- - - if "gpt-4-turbo" in model or ("gpt-4-" in model and "-preview" in model): - self.edit_format = "udiff" - self.use_repo_map = True - self.send_undo_reply = True - return # <-- - - if "gpt-4" in model or "claude-3-opus" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.send_undo_reply = True - return # <-- - - if "gpt-3.5" in model or "gpt-4" in model: - self.reminder = "sys" - return # <-- - - if "3-7-sonnet" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.examples_as_sys_msg = True - self.reminder = "user" - if "thinking_tokens" not in self.accepts_settings: - self.accepts_settings.append("thinking_tokens") - return # <-- - - if "3.5-sonnet" in model or "3-5-sonnet" in model: - self.edit_format = "diff" - self.use_repo_map = True - self.examples_as_sys_msg = True - self.reminder = "user" - return # <-- - - if model.startswith("o1-") or "/o1-" in model: - self.use_system_prompt = False - self.use_temperature = False - return # <-- - - if ( - "qwen" in model - and "coder" in model - and ("2.5" in model or "2-5" in model) - and "32b" in model - ): - self.edit_format = "diff" - self.editor_edit_format = "editor-diff" - self.use_repo_map = True - return # <-- - - if "qwq" in model and "32b" in model and "preview" not in model: - self.edit_format = "diff" - self.editor_edit_format = "editor-diff" - self.use_repo_map = True - self.reasoning_tag = "think" - self.examples_as_sys_msg = True - self.use_temperature = 0.6 - self.extra_params = dict(top_p=0.95) - return # <-- - - if "qwen3" in model: - self.edit_format = "diff" - self.use_repo_map = True - if "235b" in model: - self.system_prompt_prefix = "/no_think" - self.use_temperature = 0.7 - self.extra_params = {"top_p": 0.8, "top_k": 20, "min_p": 0.0} - else: - self.examples_as_sys_msg = True - self.use_temperature = 0.6 - self.reasoning_tag = "think" - self.extra_params = {"top_p": 0.95, "top_k": 20, "min_p": 0.0} - return # <-- - - # use the defaults - if self.edit_format == "diff": - self.use_repo_map = True - return # <-- - - def __str__(self): - return self.name - - def get_weak_model(self, provided_weak_model_name): - # If weak_model_name is provided, override the model settings - if provided_weak_model_name: - self.weak_model_name = provided_weak_model_name - - if not self.weak_model_name: - self.weak_model = self - return - - if self.weak_model_name == self.name: - self.weak_model = self - return - - self.weak_model = Model( - self.weak_model_name, - weak_model=False, - ) - return self.weak_model - - def commit_message_models(self): - return [self.weak_model, self] - - def get_editor_model(self, provided_editor_model_name, editor_edit_format): - # If editor_model_name is provided, override the model settings - if provided_editor_model_name: - self.editor_model_name = provided_editor_model_name - if editor_edit_format: - self.editor_edit_format = editor_edit_format - - if not self.editor_model_name or self.editor_model_name == self.name: - self.editor_model = self - else: - self.editor_model = Model( - self.editor_model_name, - editor_model=False, - ) - - if not self.editor_edit_format: - self.editor_edit_format = self.editor_model.edit_format - if self.editor_edit_format in ("diff", "whole", "diff-fenced"): - self.editor_edit_format = "editor-" + self.editor_edit_format - - return self.editor_model - - def tokenizer(self, text): - return litellm.encode(model=self.name, text=text) - - def token_count(self, messages): - if isinstance(messages, dict): - messages = [messages] - - if isinstance(messages, list): - try: - return litellm.token_counter(model=self.name, messages=messages) - except Exception: - pass # fall back to raw tokenizer - - if not self.tokenizer: - return 0 - - if isinstance(messages, str): - msgs = messages - else: - msgs = json.dumps(messages) - - try: - return len(self.tokenizer(msgs)) - except Exception as err: - print(f"Unable to count tokens with tokenizer: {err}") - return 0 - - def token_count_for_image(self, fname): - """ - Calculate the token cost for an image assuming high detail. - The token cost is determined by the size of the image. - :param fname: The filename of the image. - :return: The token cost for the image. - """ - width, height = self.get_image_size(fname) - - # If the image is larger than 2048 in any dimension, scale it down to fit within 2048x2048 - max_dimension = max(width, height) - if max_dimension > 2048: - scale_factor = 2048 / max_dimension - width = int(width * scale_factor) - height = int(height * scale_factor) - - # Scale the image such that the shortest side is 768 pixels long - min_dimension = min(width, height) - scale_factor = 768 / min_dimension - width = int(width * scale_factor) - height = int(height * scale_factor) - - # Calculate the number of 512x512 tiles needed to cover the image - tiles_width = math.ceil(width / 512) - tiles_height = math.ceil(height / 512) - num_tiles = tiles_width * tiles_height - - # Each tile costs 170 tokens, and there's an additional fixed cost of 85 tokens - token_cost = num_tiles * 170 + 85 - return token_cost - - def get_image_size(self, fname): - """ - Retrieve the size of an image. - :param fname: The filename of the image. - :return: A tuple (width, height) representing the image size in pixels. - """ - with Image.open(fname) as img: - return img.size - - def fast_validate_environment(self): - """Fast path for common models. Avoids forcing litellm import.""" - - model = self.name - - pieces = model.split("/") - if len(pieces) > 1: - provider = pieces[0] - else: - provider = None - - keymap = dict( - openrouter="OPENROUTER_API_KEY", - openai="OPENAI_API_KEY", - deepseek="DEEPSEEK_API_KEY", - gemini="GEMINI_API_KEY", - anthropic="ANTHROPIC_API_KEY", - groq="GROQ_API_KEY", - fireworks_ai="FIREWORKS_API_KEY", - ) - var = None - if model in OPENAI_MODELS: - var = "OPENAI_API_KEY" - elif model in ANTHROPIC_MODELS: - var = "ANTHROPIC_API_KEY" - else: - var = keymap.get(provider) - - if var and os.environ.get(var): - return dict(keys_in_environment=[var], missing_keys=[]) - - def validate_environment(self): - res = self.fast_validate_environment() - if res: - return res - - # https://github.com/BerriAI/litellm/issues/3190 - - model = self.name - res = litellm.validate_environment(model) - - # If missing AWS credential keys but AWS_PROFILE is set, consider AWS credentials valid - if res["missing_keys"] and any( - key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] for key in res["missing_keys"] - ): - if model.startswith("bedrock/") or model.startswith("us.anthropic."): - if os.environ.get("AWS_PROFILE"): - res["missing_keys"] = [ - k - for k in res["missing_keys"] - if k not in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] - ] - if not res["missing_keys"]: - res["keys_in_environment"] = True - - if res["keys_in_environment"]: - return res - if res["missing_keys"]: - return res - - provider = self.info.get("litellm_provider", "").lower() - if provider == "cohere_chat": - return validate_variables(["COHERE_API_KEY"]) - if provider == "gemini": - return validate_variables(["GEMINI_API_KEY"]) - if provider == "groq": - return validate_variables(["GROQ_API_KEY"]) - - return res - - def get_repo_map_tokens(self): - map_tokens = 1024 - max_inp_tokens = self.info.get("max_input_tokens") - if max_inp_tokens: - map_tokens = max_inp_tokens / 8 - map_tokens = min(map_tokens, 4096) - map_tokens = max(map_tokens, 1024) - return map_tokens - - def set_reasoning_effort(self, effort): - """Set the reasoning effort parameter for models that support it""" - if effort is not None: - if self.name.startswith("openrouter/"): - if not self.extra_params: - self.extra_params = {} - if "extra_body" not in self.extra_params: - self.extra_params["extra_body"] = {} - self.extra_params["extra_body"]["reasoning"] = {"effort": effort} - else: - if not self.extra_params: - self.extra_params = {} - if "extra_body" not in self.extra_params: - self.extra_params["extra_body"] = {} - self.extra_params["extra_body"]["reasoning_effort"] = effort - - def parse_token_value(self, value): - """ - Parse a token value string into an integer. - Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc. - - Args: - value: String or int token value - - Returns: - Integer token value - """ - if isinstance(value, int): - return value - - if not isinstance(value, str): - return int(value) # Try to convert to int - - value = value.strip().upper() - - if value.endswith("K"): - multiplier = 1024 - value = value[:-1] - elif value.endswith("M"): - multiplier = 1024 * 1024 - value = value[:-1] - else: - multiplier = 1 - - # Convert to float first to handle decimal values like "10.5k" - return int(float(value) * multiplier) - - def set_thinking_tokens(self, value): - """ - Set the thinking token budget for models that support it. - Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc. - Pass "0" to disable thinking tokens. - """ - if value is not None: - num_tokens = self.parse_token_value(value) - self.use_temperature = False - if not self.extra_params: - self.extra_params = {} - - # OpenRouter models use 'reasoning' instead of 'thinking' - if self.name.startswith("openrouter/"): - if "extra_body" not in self.extra_params: - self.extra_params["extra_body"] = {} - if num_tokens > 0: - self.extra_params["extra_body"]["reasoning"] = {"max_tokens": num_tokens} - else: - if "reasoning" in self.extra_params["extra_body"]: - del self.extra_params["extra_body"]["reasoning"] - else: - if num_tokens > 0: - self.extra_params["thinking"] = {"type": "enabled", "budget_tokens": num_tokens} - else: - if "thinking" in self.extra_params: - del self.extra_params["thinking"] - - def get_raw_thinking_tokens(self): - """Get formatted thinking token budget if available""" - budget = None - - if self.extra_params: - # Check for OpenRouter reasoning format - if self.name.startswith("openrouter/"): - if ( - "extra_body" in self.extra_params - and "reasoning" in self.extra_params["extra_body"] - and "max_tokens" in self.extra_params["extra_body"]["reasoning"] - ): - budget = self.extra_params["extra_body"]["reasoning"]["max_tokens"] - # Check for standard thinking format - elif ( - "thinking" in self.extra_params and "budget_tokens" in self.extra_params["thinking"] - ): - budget = self.extra_params["thinking"]["budget_tokens"] - - return budget - - def get_thinking_tokens(self): - budget = self.get_raw_thinking_tokens() - - if budget is not None: - # Format as xx.yK for thousands, xx.yM for millions - if budget >= 1024 * 1024: - value = budget / (1024 * 1024) - if value == int(value): - return f"{int(value)}M" - else: - return f"{value:.1f}M" - else: - value = budget / 1024 - if value == int(value): - return f"{int(value)}k" - else: - return f"{value:.1f}k" - return None - - def get_reasoning_effort(self): - """Get reasoning effort value if available""" - if self.extra_params: - # Check for OpenRouter reasoning format - if self.name.startswith("openrouter/"): - if ( - "extra_body" in self.extra_params - and "reasoning" in self.extra_params["extra_body"] - and "effort" in self.extra_params["extra_body"]["reasoning"] - ): - return self.extra_params["extra_body"]["reasoning"]["effort"] - # Check for standard reasoning_effort format (e.g. in extra_body) - elif ( - "extra_body" in self.extra_params - and "reasoning_effort" in self.extra_params["extra_body"] - ): - return self.extra_params["extra_body"]["reasoning_effort"] - return None - - def is_deepseek(self): - name = self.name.lower() - if "deepseek" not in name: - return - return True - - def is_ollama(self): - return self.name.startswith("ollama/") or self.name.startswith("ollama_chat/") - - async def send_completion( - self, messages, functions, stream, temperature=None, tools=None, max_tokens=None - ): - if os.environ.get("AIDER_SANITY_CHECK_TURNS"): - sanity_check_messages(messages) - - messages = model_request_parser(self, messages) - - if self.verbose: - for message in messages: - msg_role = message.get("role") - msg_content = message.get("content") if message.get("content") else "" - msg_trunc = "" - - if message.get("content"): - msg_trunc = message.get("content")[:30] - - print(f"{msg_role} ({len(msg_content)}): {msg_trunc}") - - kwargs = dict(model=self.name, stream=stream) - - if self.use_temperature is not False: - if temperature is None: - if isinstance(self.use_temperature, bool): - temperature = 0 - else: - temperature = float(self.use_temperature) - - kwargs["temperature"] = temperature - - # `tools` is for modern tool usage. `functions` is for legacy/forced calls. - # This handles `base_coder` sending both with same content for `navigator_coder`. - effective_tools = [] - if tools: - effective_tools.extend(tools) - - if functions: - # Convert legacy `functions` to `tools` format and add them - effective_tools.extend([dict(type="function", function=f) for f in functions]) - - if effective_tools: - # Deduplicate tools based on function name - seen_tool_names = set() - deduped_tools = [] - for tool in effective_tools: - tool_name = tool.get("function", {}).get("name") - if tool_name and tool_name not in seen_tool_names: - deduped_tools.append(tool) - seen_tool_names.add(tool_name) - effective_tools = deduped_tools - kwargs["tools"] = effective_tools - - # Forcing a function call is for legacy style `functions` with a single function. - # This is used by ArchitectCoder and not intended for NavigatorCoder's tools. - if functions and len(functions) == 1: - function = functions[0] - - if "name" in function: - tool_name = function.get("name") - if tool_name: - kwargs["tool_choice"] = {"type": "function", "function": {"name": tool_name}} - - if self.extra_params: - kwargs.update(self.extra_params) - - if max_tokens: - kwargs["max_tokens"] = max_tokens - - if "max_tokens" in kwargs and kwargs["max_tokens"]: - kwargs["max_completion_tokens"] = kwargs.pop("max_tokens") - if self.is_ollama() and "num_ctx" not in kwargs: - num_ctx = int(self.token_count(messages) * 1.25) + 8192 - kwargs["num_ctx"] = num_ctx - - key = json.dumps(kwargs, sort_keys=True).encode() - # dump(kwargs) - - hash_object = hashlib.sha1(key) - if "timeout" not in kwargs: - kwargs["timeout"] = request_timeout - if self.verbose: - dump(kwargs) - kwargs["messages"] = messages - - # Cache System Prompts When Possible - kwargs["cache_control_injection_points"] = [ - { - "location": "message", - "role": "system", - }, - ] - - # Are we using github copilot? - if "GITHUB_COPILOT_TOKEN" in os.environ or self.name.startswith("github_copilot/"): - if "extra_headers" not in kwargs: - kwargs["extra_headers"] = { - "Editor-Version": f"aider/{__version__}", - "Copilot-Integration-Id": "vscode-chat", - } - - try: - res = await litellm.acompletion(**kwargs) - except Exception as err: - print(f"LiteLLM API Error: {str(err)}") - res = self.model_error_response() - - if self.verbose: - print(f"LiteLLM API Error: {str(err)}") - raise - - return hash_object, res - - async def simple_send_with_retries(self, messages, max_tokens=None): - from aider.exceptions import LiteLLMExceptions - - litellm_ex = LiteLLMExceptions() - messages = model_request_parser(self, messages) - retry_delay = 0.125 - - if self.verbose: - dump(messages) - - while True: - try: - _hash, response = await self.send_completion( - messages=messages, - functions=None, - stream=False, - max_tokens=max_tokens, - ) - if not response or not hasattr(response, "choices") or not response.choices: - return None - res = response.choices[0].message.content - from aider.reasoning_tags import remove_reasoning_content - - return remove_reasoning_content(res, self.reasoning_tag) - - except litellm_ex.exceptions_tuple() as err: - ex_info = litellm_ex.get_ex_info(err) - print(str(err)) - if ex_info.description: - print(ex_info.description) - should_retry = ex_info.retry - if should_retry: - retry_delay *= 2 - if retry_delay > RETRY_TIMEOUT: - should_retry = False - if not should_retry: - return None - print(f"Retrying in {retry_delay:.1f} seconds...") - time.sleep(retry_delay) - continue - except AttributeError: - return None - - async def model_error_response(self): - for i in range(1): - await asyncio.sleep(0.1) - yield litellm.ModelResponse( - choices=[ - litellm.Choices( - finish_reason="stop", - index=0, - message=litellm.Message( - content="Model API Response Error. Please retry the previous request" - ), # Provide an empty message object - ) - ], - model=self.name, - ) - - -def register_models(model_settings_fnames): - files_loaded = [] - for model_settings_fname in model_settings_fnames: - if not os.path.exists(model_settings_fname): - continue - - if not Path(model_settings_fname).read_text().strip(): - continue - - try: - with open(model_settings_fname, "r") as model_settings_file: - model_settings_list = yaml.safe_load(model_settings_file) - - for model_settings_dict in model_settings_list: - model_settings = ModelSettings(**model_settings_dict) - - # Remove all existing settings for this model name - MODEL_SETTINGS[:] = [ms for ms in MODEL_SETTINGS if ms.name != model_settings.name] - # Add the new settings - MODEL_SETTINGS.append(model_settings) - except Exception as e: - raise Exception(f"Error loading model settings from {model_settings_fname}: {e}") - files_loaded.append(model_settings_fname) - - return files_loaded - - -def register_litellm_models(model_fnames): - files_loaded = [] - for model_fname in model_fnames: - if not os.path.exists(model_fname): - continue - - try: - data = Path(model_fname).read_text() - if not data.strip(): - continue - model_def = json5.loads(data) - if not model_def: - continue - - # Defer registration with litellm to faster path. - model_info_manager.local_model_metadata.update(model_def) - except Exception as e: - raise Exception(f"Error loading model definition from {model_fname}: {e}") - - files_loaded.append(model_fname) - - return files_loaded - - -def validate_variables(vars): - missing = [] - for var in vars: - if var not in os.environ: - missing.append(var) - if missing: - return dict(keys_in_environment=False, missing_keys=missing) - return dict(keys_in_environment=True, missing_keys=missing) - - -async def sanity_check_models(io, main_model): - problem_main = await sanity_check_model(io, main_model) - - problem_weak = None - if main_model.weak_model and main_model.weak_model is not main_model: - problem_weak = await sanity_check_model(io, main_model.weak_model) - - problem_editor = None - if ( - main_model.editor_model - and main_model.editor_model is not main_model - and main_model.editor_model is not main_model.weak_model - ): - problem_editor = await sanity_check_model(io, main_model.editor_model) - - return problem_main or problem_weak or problem_editor - - -async def sanity_check_model(io, model): - show = False - - if model.missing_keys: - show = True - io.tool_warning(f"Warning: {model} expects these environment variables") - for key in model.missing_keys: - value = os.environ.get(key, "") - status = "Set" if value else "Not set" - io.tool_output(f"- {key}: {status}") - - if platform.system() == "Windows": - io.tool_output( - "Note: You may need to restart your terminal or command prompt for `setx` to take" - " effect." - ) - - elif not model.keys_in_environment: - show = True - io.tool_warning(f"Warning for {model}: Unknown which environment variables are required.") - - # Check for model-specific dependencies - await check_for_dependencies(io, model.name) - - if not model.info: - show = True - io.tool_warning( - f"Warning for {model}: Unknown context window size and costs, using sane defaults." - ) - - possible_matches = fuzzy_match_models(model.name) - if possible_matches: - io.tool_output("Did you mean one of these?") - for match in possible_matches: - io.tool_output(f"- {match}") - - return show - - -async def check_for_dependencies(io, model_name): - """ - Check for model-specific dependencies and install them if needed. - - Args: - io: The IO object for user interaction - model_name: The name of the model to check dependencies for - """ - # Check if this is a Bedrock model and ensure boto3 is installed - if model_name.startswith("bedrock/"): - await check_pip_install_extra( - io, "boto3", "AWS Bedrock models require the boto3 package.", ["boto3"] - ) - - # Check if this is a Vertex AI model and ensure google-cloud-aiplatform is installed - elif model_name.startswith("vertex_ai/"): - await check_pip_install_extra( - io, - "google.cloud.aiplatform", - "Google Vertex AI models require the google-cloud-aiplatform package.", - ["google-cloud-aiplatform"], - ) - - -def fuzzy_match_models(name): - name = name.lower() - - chat_models = set() - model_metadata = list(litellm.model_cost.items()) - model_metadata += list(model_info_manager.local_model_metadata.items()) - - for orig_model, attrs in model_metadata: - model = orig_model.lower() - if attrs.get("mode") != "chat": - continue - provider = attrs.get("litellm_provider", "").lower() - if not provider: - continue - provider += "/" - - if model.startswith(provider): - fq_model = orig_model - else: - fq_model = provider + orig_model - - chat_models.add(fq_model) - chat_models.add(orig_model) - - chat_models = sorted(chat_models) - # exactly matching model - # matching_models = [ - # (fq,m) for fq,m in chat_models - # if name == fq or name == m - # ] - # if matching_models: - # return matching_models - - # Check for model names containing the name - matching_models = [m for m in chat_models if name in m] - if matching_models: - return sorted(set(matching_models)) - - # Check for slight misspellings - models = set(chat_models) - matching_models = difflib.get_close_matches(name, models, n=3, cutoff=0.8) - - return sorted(set(matching_models)) - - -def print_matching_models(io, search): - matches = fuzzy_match_models(search) - if matches: - io.tool_output(f'Models which match "{search}":') - for model in matches: - io.tool_output(f"- {model}") - else: - io.tool_output(f'No models match "{search}".') - - -def get_model_settings_as_yaml(): - from dataclasses import fields - - import yaml - - model_settings_list = [] - # Add default settings first with all field values - defaults = {} - for field in fields(ModelSettings): - defaults[field.name] = field.default - defaults["name"] = "(default values)" - model_settings_list.append(defaults) - - # Sort model settings by name - for ms in sorted(MODEL_SETTINGS, key=lambda x: x.name): - # Create dict with explicit field order - model_settings_dict = {} - for field in fields(ModelSettings): - value = getattr(ms, field.name) - if value != field.default: - model_settings_dict[field.name] = value - model_settings_list.append(model_settings_dict) - # Add blank line between entries - model_settings_list.append(None) - - # Filter out None values before dumping - yaml_str = yaml.dump( - [ms for ms in model_settings_list if ms is not None], - default_flow_style=False, - sort_keys=False, # Preserve field order from dataclass - ) - # Add actual blank lines between entries - return yaml_str.replace("\n- ", "\n\n- ") - - -def main(): - if len(sys.argv) < 2: - print("Usage: python models.py or python models.py --yaml") - sys.exit(1) - - if sys.argv[1] == "--yaml": - yaml_string = get_model_settings_as_yaml() - print(yaml_string) - else: - model_name = sys.argv[1] - matching_models = fuzzy_match_models(model_name) - - if matching_models: - print(f"Matching models for '{model_name}':") - for model in matching_models: - print(model) - else: - print(f"No matching models found for '{model_name}'.") - - -if __name__ == "__main__": - main() +import asyncio +import difflib +import hashlib +import importlib.resources +import json +import math +import os +import platform +import sys +import time +from dataclasses import dataclass, fields +from pathlib import Path +from typing import Optional, Union + +import json5 +import yaml +from PIL import Image + +from aider import __version__ +from aider.dump import dump # noqa: F401 +from aider.helpers.requests import model_request_parser +from aider.llm import litellm +from aider.openrouter import OpenRouterModelManager +from aider.sendchat import sanity_check_messages +from aider.utils import check_pip_install_extra + +RETRY_TIMEOUT = 60 + +request_timeout = 600 + +DEFAULT_MODEL_NAME = "gpt-4o" +ANTHROPIC_BETA_HEADER = "prompt-caching-2024-07-31,pdfs-2024-09-25" + +OPENAI_MODELS = """ +o1 +o1-preview +o1-mini +o3-mini +gpt-4 +gpt-4o +gpt-4o-2024-05-13 +gpt-4-turbo-preview +gpt-4-0314 +gpt-4-0613 +gpt-4-32k +gpt-4-32k-0314 +gpt-4-32k-0613 +gpt-4-turbo +gpt-4-turbo-2024-04-09 +gpt-4-1106-preview +gpt-4-0125-preview +gpt-4-vision-preview +gpt-4-1106-vision-preview +gpt-4o-mini +gpt-4o-mini-2024-07-18 +gpt-3.5-turbo +gpt-3.5-turbo-0301 +gpt-3.5-turbo-0613 +gpt-3.5-turbo-1106 +gpt-3.5-turbo-0125 +gpt-3.5-turbo-16k +gpt-3.5-turbo-16k-0613 +""" + +OPENAI_MODELS = [ln.strip() for ln in OPENAI_MODELS.splitlines() if ln.strip()] + +ANTHROPIC_MODELS = """ +claude-2 +claude-2.1 +claude-3-haiku-20240307 +claude-3-5-haiku-20241022 +claude-3-opus-20240229 +claude-3-sonnet-20240229 +claude-3-5-sonnet-20240620 +claude-3-5-sonnet-20241022 +claude-sonnet-4-20250514 +claude-opus-4-20250514 +""" + +ANTHROPIC_MODELS = [ln.strip() for ln in ANTHROPIC_MODELS.splitlines() if ln.strip()] + +# Mapping of model aliases to their canonical names +MODEL_ALIASES = { + # Claude models + "sonnet": "anthropic/claude-sonnet-4-20250514", + "haiku": "claude-3-5-haiku-20241022", + "opus": "claude-opus-4-20250514", + # GPT models + "4": "gpt-4-0613", + "4o": "gpt-4o", + "4-turbo": "gpt-4-1106-preview", + "35turbo": "gpt-3.5-turbo", + "35-turbo": "gpt-3.5-turbo", + "3": "gpt-3.5-turbo", + # Other models + "deepseek": "deepseek/deepseek-chat", + "flash": "gemini/gemini-2.5-flash", + "flash-lite": "gemini/gemini-2.5-flash-lite", + "quasar": "openrouter/openrouter/quasar-alpha", + "r1": "deepseek/deepseek-reasoner", + "gemini-2.5-pro": "gemini/gemini-2.5-pro", + "gemini-3-pro-preview": "gemini/gemini-3-pro-preview", + "gemini": "gemini/gemini-3-pro-preview", + "gemini-exp": "gemini/gemini-2.5-pro-exp-03-25", + "grok3": "xai/grok-3-beta", + "optimus": "openrouter/openrouter/optimus-alpha", +} +# Model metadata loaded from resources and user's files. + + +@dataclass +class ModelSettings: + # Model class needs to have each of these as well + name: str + edit_format: str = "whole" + weak_model_name: Optional[str] = None + use_repo_map: bool = False + send_undo_reply: bool = False + lazy: bool = False + overeager: bool = False + reminder: str = "user" + examples_as_sys_msg: bool = False + extra_params: Optional[dict] = None + cache_control: bool = False + caches_by_default: bool = False + use_system_prompt: bool = True + use_temperature: Union[bool, float] = True + streaming: bool = True + editor_model_name: Optional[str] = None + editor_edit_format: Optional[str] = None + reasoning_tag: Optional[str] = None + remove_reasoning: Optional[str] = None # Deprecated alias for reasoning_tag + system_prompt_prefix: Optional[str] = None + accepts_settings: Optional[list] = None + + +# Load model settings from package resource +MODEL_SETTINGS = [] +with importlib.resources.open_text("aider.resources", "model-settings.yml") as f: + model_settings_list = yaml.safe_load(f) + for model_settings_dict in model_settings_list: + MODEL_SETTINGS.append(ModelSettings(**model_settings_dict)) + + +class ModelInfoManager: + MODEL_INFO_URL = ( + "https://raw.githubusercontent.com/BerriAI/litellm/main/" + "model_prices_and_context_window.json" + ) + CACHE_TTL = 60 * 60 * 24 # 24 hours + + def __init__(self): + self.cache_dir = Path.home() / ".aider" / "caches" + self.cache_file = self.cache_dir / "model_prices_and_context_window.json" + self.content = None + self.local_model_metadata = {} + self.verify_ssl = True + self._cache_loaded = False + + # Manager for the cached OpenRouter model database + self.openrouter_manager = OpenRouterModelManager() + + def set_verify_ssl(self, verify_ssl): + self.verify_ssl = verify_ssl + if hasattr(self, "openrouter_manager"): + self.openrouter_manager.set_verify_ssl(verify_ssl) + + def _load_cache(self): + if self._cache_loaded: + return + + try: + self.cache_dir.mkdir(parents=True, exist_ok=True) + if self.cache_file.exists(): + cache_age = time.time() - self.cache_file.stat().st_mtime + if cache_age < self.CACHE_TTL: + try: + self.content = json.loads(self.cache_file.read_text()) + except json.JSONDecodeError: + # If the cache file is corrupted, treat it as missing + self.content = None + except OSError: + pass + + self._cache_loaded = True + + def _update_cache(self): + try: + import requests + + # Respect the --no-verify-ssl switch + response = requests.get(self.MODEL_INFO_URL, timeout=5, verify=self.verify_ssl) + if response.status_code == 200: + self.content = response.json() + try: + self.cache_file.write_text(json.dumps(self.content, indent=4)) + except OSError: + pass + except Exception as ex: + print(str(ex)) + try: + # Save empty dict to cache file on failure + self.cache_file.write_text("{}") + except OSError: + pass + + def get_model_from_cached_json_db(self, model): + data = self.local_model_metadata.get(model) + if data: + return data + + # Ensure cache is loaded before checking content + self._load_cache() + + if not self.content: + self._update_cache() + + if not self.content: + return dict() + + info = self.content.get(model, dict()) + if info: + return info + + pieces = model.split("/") + if len(pieces) == 2: + info = self.content.get(pieces[1]) + if info and info.get("litellm_provider") == pieces[0]: + return info + + return dict() + + def get_model_info(self, model): + cached_info = self.get_model_from_cached_json_db(model) + + litellm_info = None + if litellm._lazy_module or not cached_info: + try: + litellm_info = litellm.get_model_info(model) + except Exception as ex: + if "model_prices_and_context_window.json" not in str(ex): + print(str(ex)) + + if litellm_info: + return litellm_info + + if not cached_info and model.startswith("openrouter/"): + # First try using the locally cached OpenRouter model database + openrouter_info = self.openrouter_manager.get_model_info(model) + if openrouter_info: + return openrouter_info + + # Fallback to legacy web-scraping if the API cache does not contain the model + openrouter_info = self.fetch_openrouter_model_info(model) + if openrouter_info: + return openrouter_info + + return cached_info + + def fetch_openrouter_model_info(self, model): + """ + Fetch model info by scraping the openrouter model page. + Expected URL: https://openrouter.ai/ + Example: openrouter/qwen/qwen-2.5-72b-instruct:free + Returns a dict with keys: max_tokens, max_input_tokens, max_output_tokens, + input_cost_per_token, output_cost_per_token. + """ + url_part = model[len("openrouter/") :] + url = "https://openrouter.ai/" + url_part + try: + import requests + + response = requests.get(url, timeout=5, verify=self.verify_ssl) + if response.status_code != 200: + return {} + html = response.text + import re + + if re.search( + rf"The model\s*.*{re.escape(url_part)}.* is not available", html, re.IGNORECASE + ): + print(f"\033[91mError: Model '{url_part}' is not available\033[0m") + return {} + text = re.sub(r"<[^>]+>", " ", html) + context_match = re.search(r"([\d,]+)\s*context", text) + if context_match: + context_str = context_match.group(1).replace(",", "") + context_size = int(context_str) + else: + context_size = None + input_cost_match = re.search(r"\$\s*([\d.]+)\s*/M input tokens", text, re.IGNORECASE) + output_cost_match = re.search(r"\$\s*([\d.]+)\s*/M output tokens", text, re.IGNORECASE) + input_cost = float(input_cost_match.group(1)) / 1000000 if input_cost_match else None + output_cost = float(output_cost_match.group(1)) / 1000000 if output_cost_match else None + if context_size is None or input_cost is None or output_cost is None: + return {} + params = { + "max_input_tokens": context_size, + "max_tokens": context_size, + "max_output_tokens": context_size, + "input_cost_per_token": input_cost, + "output_cost_per_token": output_cost, + } + return params + except Exception as e: + print("Error fetching openrouter info:", str(e)) + return {} + + +model_info_manager = ModelInfoManager() + + +class Model(ModelSettings): + def __init__( + self, model, weak_model=None, editor_model=None, editor_edit_format=None, verbose=False + ): + # Map any alias to its canonical name + model = MODEL_ALIASES.get(model, model) + + self.name = model + self.verbose = verbose + + self.max_chat_history_tokens = 1024 + self.weak_model = None + self.editor_model = None + + # Find the extra settings + self.extra_model_settings = next( + (ms for ms in MODEL_SETTINGS if ms.name == "aider/extra_params"), None + ) + + self.info = self.get_model_info(model) + + # Are all needed keys/params available? + res = self.validate_environment() + self.missing_keys = res.get("missing_keys") + self.keys_in_environment = res.get("keys_in_environment") + + max_input_tokens = self.info.get("max_input_tokens") or 0 + # Calculate max_chat_history_tokens as 1/16th of max_input_tokens, + # with minimum 1k and maximum 8k + self.max_chat_history_tokens = min(max(max_input_tokens / 16, 1024), 8192) + + self.configure_model_settings(model) + if weak_model is False: + self.weak_model_name = None + else: + self.get_weak_model(weak_model) + + if editor_model is False: + self.editor_model_name = None + else: + self.get_editor_model(editor_model, editor_edit_format) + + def get_model_info(self, model): + return model_info_manager.get_model_info(model) + + def _copy_fields(self, source): + """Helper to copy fields from a ModelSettings instance to self""" + for field in fields(ModelSettings): + val = getattr(source, field.name) + setattr(self, field.name, val) + + # Handle backward compatibility: if remove_reasoning is set but reasoning_tag isn't, + # use remove_reasoning's value for reasoning_tag + if self.reasoning_tag is None and self.remove_reasoning is not None: + self.reasoning_tag = self.remove_reasoning + + def configure_model_settings(self, model): + # Look for exact model match + exact_match = False + for ms in MODEL_SETTINGS: + # direct match, or match "provider/" + if model == ms.name: + self._copy_fields(ms) + exact_match = True + break # Continue to apply overrides + + # Initialize accepts_settings if it's None + if self.accepts_settings is None: + self.accepts_settings = [] + + model = model.lower() + + # If no exact match, try generic settings + if not exact_match: + self.apply_generic_model_settings(model) + + # Apply override settings last if they exist + if ( + self.extra_model_settings + and self.extra_model_settings.extra_params + and self.extra_model_settings.name == "aider/extra_params" + ): + # Initialize extra_params if it doesn't exist + if not self.extra_params: + self.extra_params = {} + + # Deep merge the extra_params dicts + for key, value in self.extra_model_settings.extra_params.items(): + if isinstance(value, dict) and isinstance(self.extra_params.get(key), dict): + # For nested dicts, merge recursively + self.extra_params[key] = {**self.extra_params[key], **value} + else: + # For non-dict values, simply update + self.extra_params[key] = value + + # Ensure OpenRouter models accept thinking_tokens and reasoning_effort + if self.name.startswith("openrouter/"): + if self.accepts_settings is None: + self.accepts_settings = [] + if "thinking_tokens" not in self.accepts_settings: + self.accepts_settings.append("thinking_tokens") + if "reasoning_effort" not in self.accepts_settings: + self.accepts_settings.append("reasoning_effort") + + def apply_generic_model_settings(self, model): + if "/o3-mini" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.use_temperature = False + self.system_prompt_prefix = "Formatting re-enabled. " + self.system_prompt_prefix = "Formatting re-enabled. " + if "reasoning_effort" not in self.accepts_settings: + self.accepts_settings.append("reasoning_effort") + return # <-- + + if "gpt-4.1-mini" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.reminder = "sys" + self.examples_as_sys_msg = False + return # <-- + + if "gpt-4.1" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.reminder = "sys" + self.examples_as_sys_msg = False + return # <-- + + last_segment = model.split("/")[-1] + if last_segment in ("gpt-5", "gpt-5-2025-08-07") or "gpt-5.1" in model: + self.use_temperature = False + self.edit_format = "diff" + if "reasoning_effort" not in self.accepts_settings: + self.accepts_settings.append("reasoning_effort") + return # <-- + + if "/o1-mini" in model: + self.use_repo_map = True + self.use_temperature = False + self.use_system_prompt = False + return # <-- + + if "/o1-preview" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.use_temperature = False + self.use_system_prompt = False + return # <-- + + if "/o1" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.use_temperature = False + self.streaming = False + self.system_prompt_prefix = "Formatting re-enabled. " + if "reasoning_effort" not in self.accepts_settings: + self.accepts_settings.append("reasoning_effort") + return # <-- + + if "deepseek" in model and "v3" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.reminder = "sys" + self.examples_as_sys_msg = True + return # <-- + + if "deepseek" in model and ("r1" in model or "reasoning" in model): + self.edit_format = "diff" + self.use_repo_map = True + self.examples_as_sys_msg = True + self.use_temperature = False + self.reasoning_tag = "think" + return # <-- + + if ("llama3" in model or "llama-3" in model) and "70b" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.send_undo_reply = True + self.examples_as_sys_msg = True + return # <-- + + if "gpt-4-turbo" in model or ("gpt-4-" in model and "-preview" in model): + self.edit_format = "udiff" + self.use_repo_map = True + self.send_undo_reply = True + return # <-- + + if "gpt-4" in model or "claude-3-opus" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.send_undo_reply = True + return # <-- + + if "gpt-3.5" in model or "gpt-4" in model: + self.reminder = "sys" + return # <-- + + if "3-7-sonnet" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.examples_as_sys_msg = True + self.reminder = "user" + if "thinking_tokens" not in self.accepts_settings: + self.accepts_settings.append("thinking_tokens") + return # <-- + + if "3.5-sonnet" in model or "3-5-sonnet" in model: + self.edit_format = "diff" + self.use_repo_map = True + self.examples_as_sys_msg = True + self.reminder = "user" + return # <-- + + if model.startswith("o1-") or "/o1-" in model: + self.use_system_prompt = False + self.use_temperature = False + return # <-- + + if ( + "qwen" in model + and "coder" in model + and ("2.5" in model or "2-5" in model) + and "32b" in model + ): + self.edit_format = "diff" + self.editor_edit_format = "editor-diff" + self.use_repo_map = True + return # <-- + + if "qwq" in model and "32b" in model and "preview" not in model: + self.edit_format = "diff" + self.editor_edit_format = "editor-diff" + self.use_repo_map = True + self.reasoning_tag = "think" + self.examples_as_sys_msg = True + self.use_temperature = 0.6 + self.extra_params = dict(top_p=0.95) + return # <-- + + if "qwen3" in model: + self.edit_format = "diff" + self.use_repo_map = True + if "235b" in model: + self.system_prompt_prefix = "/no_think" + self.use_temperature = 0.7 + self.extra_params = {"top_p": 0.8, "top_k": 20, "min_p": 0.0} + else: + self.examples_as_sys_msg = True + self.use_temperature = 0.6 + self.reasoning_tag = "think" + self.extra_params = {"top_p": 0.95, "top_k": 20, "min_p": 0.0} + return # <-- + + # use the defaults + if self.edit_format == "diff": + self.use_repo_map = True + return # <-- + + def __str__(self): + return self.name + + def get_weak_model(self, provided_weak_model_name): + # If weak_model_name is provided, override the model settings + if provided_weak_model_name: + self.weak_model_name = provided_weak_model_name + + if not self.weak_model_name: + self.weak_model = self + return + + if self.weak_model_name == self.name: + self.weak_model = self + return + + self.weak_model = Model( + self.weak_model_name, + weak_model=False, + ) + return self.weak_model + + def commit_message_models(self): + return [self.weak_model, self] + + def get_editor_model(self, provided_editor_model_name, editor_edit_format): + # If editor_model_name is provided, override the model settings + if provided_editor_model_name: + self.editor_model_name = provided_editor_model_name + if editor_edit_format: + self.editor_edit_format = editor_edit_format + + if not self.editor_model_name or self.editor_model_name == self.name: + self.editor_model = self + else: + self.editor_model = Model( + self.editor_model_name, + editor_model=False, + ) + + if not self.editor_edit_format: + self.editor_edit_format = self.editor_model.edit_format + if self.editor_edit_format in ("diff", "whole", "diff-fenced"): + self.editor_edit_format = "editor-" + self.editor_edit_format + + return self.editor_model + + def tokenizer(self, text): + return litellm.encode(model=self.name, text=text) + + def token_count(self, messages): + if isinstance(messages, dict): + messages = [messages] + + if isinstance(messages, list): + try: + return litellm.token_counter(model=self.name, messages=messages) + except Exception: + pass # fall back to raw tokenizer + + if not self.tokenizer: + return 0 + + if isinstance(messages, str): + msgs = messages + else: + msgs = json.dumps(messages) + + try: + return len(self.tokenizer(msgs)) + except Exception as err: + print(f"Unable to count tokens with tokenizer: {err}") + return 0 + + def token_count_for_image(self, fname): + """ + Calculate the token cost for an image assuming high detail. + The token cost is determined by the size of the image. + :param fname: The filename of the image. + :return: The token cost for the image. + """ + width, height = self.get_image_size(fname) + + # If the image is larger than 2048 in any dimension, scale it down to fit within 2048x2048 + max_dimension = max(width, height) + if max_dimension > 2048: + scale_factor = 2048 / max_dimension + width = int(width * scale_factor) + height = int(height * scale_factor) + + # Scale the image such that the shortest side is 768 pixels long + min_dimension = min(width, height) + scale_factor = 768 / min_dimension + width = int(width * scale_factor) + height = int(height * scale_factor) + + # Calculate the number of 512x512 tiles needed to cover the image + tiles_width = math.ceil(width / 512) + tiles_height = math.ceil(height / 512) + num_tiles = tiles_width * tiles_height + + # Each tile costs 170 tokens, and there's an additional fixed cost of 85 tokens + token_cost = num_tiles * 170 + 85 + return token_cost + + def get_image_size(self, fname): + """ + Retrieve the size of an image. + :param fname: The filename of the image. + :return: A tuple (width, height) representing the image size in pixels. + """ + with Image.open(fname) as img: + return img.size + + def fast_validate_environment(self): + """Fast path for common models. Avoids forcing litellm import.""" + + model = self.name + + pieces = model.split("/") + if len(pieces) > 1: + provider = pieces[0] + else: + provider = None + + keymap = dict( + openrouter="OPENROUTER_API_KEY", + openai="OPENAI_API_KEY", + deepseek="DEEPSEEK_API_KEY", + gemini="GEMINI_API_KEY", + anthropic="ANTHROPIC_API_KEY", + groq="GROQ_API_KEY", + fireworks_ai="FIREWORKS_API_KEY", + ) + var = None + if model in OPENAI_MODELS: + var = "OPENAI_API_KEY" + elif model in ANTHROPIC_MODELS: + var = "ANTHROPIC_API_KEY" + else: + var = keymap.get(provider) + + if var and os.environ.get(var): + return dict(keys_in_environment=[var], missing_keys=[]) + + def validate_environment(self): + res = self.fast_validate_environment() + if res: + return res + + # https://github.com/BerriAI/litellm/issues/3190 + + model = self.name + res = litellm.validate_environment(model) + + # If missing AWS credential keys but AWS_PROFILE is set, consider AWS credentials valid + if res["missing_keys"] and any( + key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] for key in res["missing_keys"] + ): + if model.startswith("bedrock/") or model.startswith("us.anthropic."): + if os.environ.get("AWS_PROFILE"): + res["missing_keys"] = [ + k + for k in res["missing_keys"] + if k not in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] + ] + if not res["missing_keys"]: + res["keys_in_environment"] = True + + if res["keys_in_environment"]: + return res + if res["missing_keys"]: + return res + + provider = self.info.get("litellm_provider", "").lower() + if provider == "cohere_chat": + return validate_variables(["COHERE_API_KEY"]) + if provider == "gemini": + return validate_variables(["GEMINI_API_KEY"]) + if provider == "groq": + return validate_variables(["GROQ_API_KEY"]) + + return res + + def get_repo_map_tokens(self): + map_tokens = 1024 + max_inp_tokens = self.info.get("max_input_tokens") + if max_inp_tokens: + map_tokens = max_inp_tokens / 8 + map_tokens = min(map_tokens, 4096) + map_tokens = max(map_tokens, 1024) + return map_tokens + + def set_reasoning_effort(self, effort): + """Set the reasoning effort parameter for models that support it""" + if effort is not None: + if self.name.startswith("openrouter/"): + if not self.extra_params: + self.extra_params = {} + if "extra_body" not in self.extra_params: + self.extra_params["extra_body"] = {} + self.extra_params["extra_body"]["reasoning"] = {"effort": effort} + else: + if not self.extra_params: + self.extra_params = {} + if "extra_body" not in self.extra_params: + self.extra_params["extra_body"] = {} + self.extra_params["extra_body"]["reasoning_effort"] = effort + + def parse_token_value(self, value): + """ + Parse a token value string into an integer. + Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc. + + Args: + value: String or int token value + + Returns: + Integer token value + """ + if isinstance(value, int): + return value + + if not isinstance(value, str): + return int(value) # Try to convert to int + + value = value.strip().upper() + + if value.endswith("K"): + multiplier = 1024 + value = value[:-1] + elif value.endswith("M"): + multiplier = 1024 * 1024 + value = value[:-1] + else: + multiplier = 1 + + # Convert to float first to handle decimal values like "10.5k" + return int(float(value) * multiplier) + + def set_thinking_tokens(self, value): + """ + Set the thinking token budget for models that support it. + Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc. + Pass "0" to disable thinking tokens. + """ + if value is not None: + num_tokens = self.parse_token_value(value) + self.use_temperature = False + if not self.extra_params: + self.extra_params = {} + + # OpenRouter models use 'reasoning' instead of 'thinking' + if self.name.startswith("openrouter/"): + if "extra_body" not in self.extra_params: + self.extra_params["extra_body"] = {} + if num_tokens > 0: + self.extra_params["extra_body"]["reasoning"] = {"max_tokens": num_tokens} + else: + if "reasoning" in self.extra_params["extra_body"]: + del self.extra_params["extra_body"]["reasoning"] + else: + if num_tokens > 0: + self.extra_params["thinking"] = {"type": "enabled", "budget_tokens": num_tokens} + else: + if "thinking" in self.extra_params: + del self.extra_params["thinking"] + + def get_raw_thinking_tokens(self): + """Get formatted thinking token budget if available""" + budget = None + + if self.extra_params: + # Check for OpenRouter reasoning format + if self.name.startswith("openrouter/"): + if ( + "extra_body" in self.extra_params + and "reasoning" in self.extra_params["extra_body"] + and "max_tokens" in self.extra_params["extra_body"]["reasoning"] + ): + budget = self.extra_params["extra_body"]["reasoning"]["max_tokens"] + # Check for standard thinking format + elif ( + "thinking" in self.extra_params and "budget_tokens" in self.extra_params["thinking"] + ): + budget = self.extra_params["thinking"]["budget_tokens"] + + return budget + + def get_thinking_tokens(self): + budget = self.get_raw_thinking_tokens() + + if budget is not None: + # Format as xx.yK for thousands, xx.yM for millions + if budget >= 1024 * 1024: + value = budget / (1024 * 1024) + if value == int(value): + return f"{int(value)}M" + else: + return f"{value:.1f}M" + else: + value = budget / 1024 + if value == int(value): + return f"{int(value)}k" + else: + return f"{value:.1f}k" + return None + + def get_reasoning_effort(self): + """Get reasoning effort value if available""" + if self.extra_params: + # Check for OpenRouter reasoning format + if self.name.startswith("openrouter/"): + if ( + "extra_body" in self.extra_params + and "reasoning" in self.extra_params["extra_body"] + and "effort" in self.extra_params["extra_body"]["reasoning"] + ): + return self.extra_params["extra_body"]["reasoning"]["effort"] + # Check for standard reasoning_effort format (e.g. in extra_body) + elif ( + "extra_body" in self.extra_params + and "reasoning_effort" in self.extra_params["extra_body"] + ): + return self.extra_params["extra_body"]["reasoning_effort"] + return None + + def is_deepseek(self): + name = self.name.lower() + if "deepseek" not in name: + return + return True + + def is_ollama(self): + return self.name.startswith("ollama/") or self.name.startswith("ollama_chat/") + + async def send_completion( + self, messages, functions, stream, temperature=None, tools=None, max_tokens=None + ): + if os.environ.get("AIDER_SANITY_CHECK_TURNS"): + sanity_check_messages(messages) + + messages = model_request_parser(self, messages) + + if self.verbose: + for message in messages: + msg_role = message.get("role") + msg_content = message.get("content") if message.get("content") else "" + msg_trunc = "" + + if message.get("content"): + msg_trunc = message.get("content")[:30] + + print(f"{msg_role} ({len(msg_content)}): {msg_trunc}") + + kwargs = dict(model=self.name, stream=stream) + + if self.use_temperature is not False: + if temperature is None: + if isinstance(self.use_temperature, bool): + temperature = 0 + else: + temperature = float(self.use_temperature) + + kwargs["temperature"] = temperature + + # `tools` is for modern tool usage. `functions` is for legacy/forced calls. + # This handles `base_coder` sending both with same content for `navigator_coder`. + effective_tools = [] + if tools: + effective_tools.extend(tools) + + if functions: + # Convert legacy `functions` to `tools` format and add them + effective_tools.extend([dict(type="function", function=f) for f in functions]) + + if effective_tools: + # Deduplicate tools based on function name + seen_tool_names = set() + deduped_tools = [] + for tool in effective_tools: + tool_name = tool.get("function", {}).get("name") + if tool_name and tool_name not in seen_tool_names: + deduped_tools.append(tool) + seen_tool_names.add(tool_name) + effective_tools = deduped_tools + kwargs["tools"] = effective_tools + + # Forcing a function call is for legacy style `functions` with a single function. + # This is used by ArchitectCoder and not intended for NavigatorCoder's tools. + if functions and len(functions) == 1: + function = functions[0] + + if "name" in function: + tool_name = function.get("name") + if tool_name: + kwargs["tool_choice"] = {"type": "function", "function": {"name": tool_name}} + + if self.extra_params: + kwargs.update(self.extra_params) + + if max_tokens: + kwargs["max_tokens"] = max_tokens + + if "max_tokens" in kwargs and kwargs["max_tokens"]: + kwargs["max_completion_tokens"] = kwargs.pop("max_tokens") + if self.is_ollama() and "num_ctx" not in kwargs: + num_ctx = int(self.token_count(messages) * 1.25) + 8192 + kwargs["num_ctx"] = num_ctx + + key = json.dumps(kwargs, sort_keys=True).encode() + # dump(kwargs) + + hash_object = hashlib.sha1(key) + if "timeout" not in kwargs: + kwargs["timeout"] = request_timeout + if self.verbose: + dump(kwargs) + kwargs["messages"] = messages + + # Cache System Prompts When Possible + kwargs["cache_control_injection_points"] = [ + { + "location": "message", + "role": "system", + }, + ] + + # Are we using github copilot? + if "GITHUB_COPILOT_TOKEN" in os.environ or self.name.startswith("github_copilot/"): + if "extra_headers" not in kwargs: + kwargs["extra_headers"] = { + "Editor-Version": f"aider/{__version__}", + "Copilot-Integration-Id": "vscode-chat", + } + + try: + res = await litellm.acompletion(**kwargs) + except Exception as err: + print(f"LiteLLM API Error: {str(err)}") + res = self.model_error_response() + + if self.verbose: + print(f"LiteLLM API Error: {str(err)}") + raise + + return hash_object, res + + async def simple_send_with_retries(self, messages, max_tokens=None): + from aider.exceptions import LiteLLMExceptions + + litellm_ex = LiteLLMExceptions() + messages = model_request_parser(self, messages) + retry_delay = 0.125 + + if self.verbose: + dump(messages) + + while True: + try: + _hash, response = await self.send_completion( + messages=messages, + functions=None, + stream=False, + max_tokens=max_tokens, + ) + if not response or not hasattr(response, "choices") or not response.choices: + return None + res = response.choices[0].message.content + from aider.reasoning_tags import remove_reasoning_content + + return remove_reasoning_content(res, self.reasoning_tag) + + except litellm_ex.exceptions_tuple() as err: + ex_info = litellm_ex.get_ex_info(err) + print(str(err)) + if ex_info.description: + print(ex_info.description) + should_retry = ex_info.retry + if should_retry: + retry_delay *= 2 + if retry_delay > RETRY_TIMEOUT: + should_retry = False + if not should_retry: + return None + print(f"Retrying in {retry_delay:.1f} seconds...") + time.sleep(retry_delay) + continue + except AttributeError: + return None + + async def model_error_response(self): + for i in range(1): + await asyncio.sleep(0.1) + yield litellm.ModelResponse( + choices=[ + litellm.Choices( + finish_reason="stop", + index=0, + message=litellm.Message( + content="Model API Response Error. Please retry the previous request" + ), # Provide an empty message object + ) + ], + model=self.name, + ) + + +def register_models(model_settings_fnames): + files_loaded = [] + for model_settings_fname in model_settings_fnames: + if not os.path.exists(model_settings_fname): + continue + + if not Path(model_settings_fname).read_text().strip(): + continue + + try: + with open(model_settings_fname, "r") as model_settings_file: + model_settings_list = yaml.safe_load(model_settings_file) + + for model_settings_dict in model_settings_list: + model_settings = ModelSettings(**model_settings_dict) + + # Remove all existing settings for this model name + MODEL_SETTINGS[:] = [ms for ms in MODEL_SETTINGS if ms.name != model_settings.name] + # Add the new settings + MODEL_SETTINGS.append(model_settings) + except Exception as e: + raise Exception(f"Error loading model settings from {model_settings_fname}: {e}") + files_loaded.append(model_settings_fname) + + return files_loaded + + +def register_litellm_models(model_fnames): + files_loaded = [] + for model_fname in model_fnames: + if not os.path.exists(model_fname): + continue + + try: + data = Path(model_fname).read_text() + if not data.strip(): + continue + model_def = json5.loads(data) + if not model_def: + continue + + # Defer registration with litellm to faster path. + model_info_manager.local_model_metadata.update(model_def) + except Exception as e: + raise Exception(f"Error loading model definition from {model_fname}: {e}") + + files_loaded.append(model_fname) + + return files_loaded + + +def validate_variables(vars): + missing = [] + for var in vars: + if var not in os.environ: + missing.append(var) + if missing: + return dict(keys_in_environment=False, missing_keys=missing) + return dict(keys_in_environment=True, missing_keys=missing) + + +async def sanity_check_models(io, main_model): + problem_main = await sanity_check_model(io, main_model) + + problem_weak = None + if main_model.weak_model and main_model.weak_model is not main_model: + problem_weak = await sanity_check_model(io, main_model.weak_model) + + problem_editor = None + if ( + main_model.editor_model + and main_model.editor_model is not main_model + and main_model.editor_model is not main_model.weak_model + ): + problem_editor = await sanity_check_model(io, main_model.editor_model) + + return problem_main or problem_weak or problem_editor + + +async def sanity_check_model(io, model): + show = False + + if model.missing_keys: + show = True + io.tool_warning(f"Warning: {model} expects these environment variables") + for key in model.missing_keys: + value = os.environ.get(key, "") + status = "Set" if value else "Not set" + io.tool_output(f"- {key}: {status}") + + if platform.system() == "Windows": + io.tool_output( + "Note: You may need to restart your terminal or command prompt for `setx` to take" + " effect." + ) + + elif not model.keys_in_environment: + show = True + io.tool_warning(f"Warning for {model}: Unknown which environment variables are required.") + + # Check for model-specific dependencies + await check_for_dependencies(io, model.name) + + if not model.info: + show = True + io.tool_warning( + f"Warning for {model}: Unknown context window size and costs, using sane defaults." + ) + + possible_matches = fuzzy_match_models(model.name) + if possible_matches: + io.tool_output("Did you mean one of these?") + for match in possible_matches: + io.tool_output(f"- {match}") + + return show + + +async def check_for_dependencies(io, model_name): + """ + Check for model-specific dependencies and install them if needed. + + Args: + io: The IO object for user interaction + model_name: The name of the model to check dependencies for + """ + # Check if this is a Bedrock model and ensure boto3 is installed + if model_name.startswith("bedrock/"): + await check_pip_install_extra( + io, "boto3", "AWS Bedrock models require the boto3 package.", ["boto3"] + ) + + # Check if this is a Vertex AI model and ensure google-cloud-aiplatform is installed + elif model_name.startswith("vertex_ai/"): + await check_pip_install_extra( + io, + "google.cloud.aiplatform", + "Google Vertex AI models require the google-cloud-aiplatform package.", + ["google-cloud-aiplatform"], + ) + + +def fuzzy_match_models(name): + name = name.lower() + + chat_models = set() + model_metadata = list(litellm.model_cost.items()) + model_metadata += list(model_info_manager.local_model_metadata.items()) + + for orig_model, attrs in model_metadata: + model = orig_model.lower() + if attrs.get("mode") != "chat": + continue + provider = attrs.get("litellm_provider", "").lower() + if not provider: + continue + provider += "/" + + if model.startswith(provider): + fq_model = orig_model + else: + fq_model = provider + orig_model + + chat_models.add(fq_model) + chat_models.add(orig_model) + + chat_models = sorted(chat_models) + # exactly matching model + # matching_models = [ + # (fq,m) for fq,m in chat_models + # if name == fq or name == m + # ] + # if matching_models: + # return matching_models + + # Check for model names containing the name + matching_models = [m for m in chat_models if name in m] + if matching_models: + return sorted(set(matching_models)) + + # Check for slight misspellings + models = set(chat_models) + matching_models = difflib.get_close_matches(name, models, n=3, cutoff=0.8) + + return sorted(set(matching_models)) + + +def print_matching_models(io, search): + matches = fuzzy_match_models(search) + if matches: + io.tool_output(f'Models which match "{search}":') + for model in matches: + io.tool_output(f"- {model}") + else: + io.tool_output(f'No models match "{search}".') + + +def get_model_settings_as_yaml(): + from dataclasses import fields + + import yaml + + model_settings_list = [] + # Add default settings first with all field values + defaults = {} + for field in fields(ModelSettings): + defaults[field.name] = field.default + defaults["name"] = "(default values)" + model_settings_list.append(defaults) + + # Sort model settings by name + for ms in sorted(MODEL_SETTINGS, key=lambda x: x.name): + # Create dict with explicit field order + model_settings_dict = {} + for field in fields(ModelSettings): + value = getattr(ms, field.name) + if value != field.default: + model_settings_dict[field.name] = value + model_settings_list.append(model_settings_dict) + # Add blank line between entries + model_settings_list.append(None) + + # Filter out None values before dumping + yaml_str = yaml.dump( + [ms for ms in model_settings_list if ms is not None], + default_flow_style=False, + sort_keys=False, # Preserve field order from dataclass + ) + # Add actual blank lines between entries + return yaml_str.replace("\n- ", "\n\n- ") + + +def main(): + if len(sys.argv) < 2: + print("Usage: python models.py or python models.py --yaml") + sys.exit(1) + + if sys.argv[1] == "--yaml": + yaml_string = get_model_settings_as_yaml() + print(yaml_string) + else: + model_name = sys.argv[1] + matching_models = fuzzy_match_models(model_name) + + if matching_models: + print(f"Matching models for '{model_name}':") + for model in matching_models: + print(model) + else: + print(f"No matching models found for '{model_name}'.") + + +if __name__ == "__main__": + main() diff --git a/tests/basic/test_exceptions.py b/tests/basic/test_exceptions.py index 821bf249956..f19d758c723 100644 --- a/tests/basic/test_exceptions.py +++ b/tests/basic/test_exceptions.py @@ -1,97 +1,97 @@ -from aider.exceptions import ExInfo, LiteLLMExceptions - - -def test_litellm_exceptions_load(): - """Test that LiteLLMExceptions loads without errors""" - ex = LiteLLMExceptions() - assert len(ex.exceptions) > 0 - - -def test_exceptions_tuple(): - """Test that exceptions_tuple returns a non-empty tuple""" - ex = LiteLLMExceptions() - assert isinstance(ex.exceptions_tuple(), tuple) - assert len(ex.exceptions_tuple()) > 0 - - -def test_get_ex_info(): - """Test get_ex_info returns correct ExInfo""" - ex = LiteLLMExceptions() - - # Test with a known exception type - from litellm import AuthenticationError - - auth_error = AuthenticationError( - message="Invalid API key", llm_provider="openai", model="gpt-4" - ) - ex_info = ex.get_ex_info(auth_error) - assert isinstance(ex_info, ExInfo) - assert ex_info.name == "AuthenticationError" - assert ex_info.retry is False - assert "API key" in ex_info.description - - # Test with unknown exception type - class UnknownError(Exception): - pass - - unknown = UnknownError() - ex_info = ex.get_ex_info(unknown) - assert isinstance(ex_info, ExInfo) - assert ex_info.name is None - assert ex_info.retry is None - assert ex_info.description is None - - -def test_rate_limit_error(): - """Test specific handling of RateLimitError""" - ex = LiteLLMExceptions() - from litellm import RateLimitError - - rate_error = RateLimitError(message="Rate limit exceeded", llm_provider="openai", model="gpt-4") - ex_info = ex.get_ex_info(rate_error) - assert ex_info.retry is True - assert "rate limited" in ex_info.description.lower() - - -def test_bad_gateway_error(): - """Test specific handling of BadGatewayError""" - ex = LiteLLMExceptions() - from litellm import BadGatewayError - - bad_gateway_error = BadGatewayError( - message="Bad Gateway", llm_provider="openai", model="gpt-4" - ) - ex_info = ex.get_ex_info(bad_gateway_error) - assert ex_info.retry is True - assert ex_info.name == "BadGatewayError" - - -def test_context_window_error(): - """Test specific handling of ContextWindowExceededError""" - ex = LiteLLMExceptions() - from litellm import ContextWindowExceededError - - ctx_error = ContextWindowExceededError( - message="Context length exceeded", model="gpt-4", llm_provider="openai" - ) - ex_info = ex.get_ex_info(ctx_error) - assert ex_info.retry is False - - -def test_openrouter_error(): - """Test specific handling of OpenRouter API errors""" - ex = LiteLLMExceptions() - from litellm import APIConnectionError - - # Create an APIConnectionError with OpenrouterException message - openrouter_error = APIConnectionError( - message="APIConnectionError: OpenrouterException - 'choices'", - model="openrouter/model", - llm_provider="openrouter", - ) - - ex_info = ex.get_ex_info(openrouter_error) - assert ex_info.retry is True - assert "OpenRouter" in ex_info.description - assert "overloaded" in ex_info.description - assert "rate" in ex_info.description +from aider.exceptions import ExInfo, LiteLLMExceptions + + +def test_litellm_exceptions_load(): + """Test that LiteLLMExceptions loads without errors""" + ex = LiteLLMExceptions() + assert len(ex.exceptions) > 0 + + +def test_exceptions_tuple(): + """Test that exceptions_tuple returns a non-empty tuple""" + ex = LiteLLMExceptions() + assert isinstance(ex.exceptions_tuple(), tuple) + assert len(ex.exceptions_tuple()) > 0 + + +def test_get_ex_info(): + """Test get_ex_info returns correct ExInfo""" + ex = LiteLLMExceptions() + + # Test with a known exception type + from litellm import AuthenticationError + + auth_error = AuthenticationError( + message="Invalid API key", llm_provider="openai", model="gpt-4" + ) + ex_info = ex.get_ex_info(auth_error) + assert isinstance(ex_info, ExInfo) + assert ex_info.name == "AuthenticationError" + assert ex_info.retry is False + assert "API key" in ex_info.description + + # Test with unknown exception type + class UnknownError(Exception): + pass + + unknown = UnknownError() + ex_info = ex.get_ex_info(unknown) + assert isinstance(ex_info, ExInfo) + assert ex_info.name is None + assert ex_info.retry is None + assert ex_info.description is None + + +def test_rate_limit_error(): + """Test specific handling of RateLimitError""" + ex = LiteLLMExceptions() + from litellm import RateLimitError + + rate_error = RateLimitError(message="Rate limit exceeded", llm_provider="openai", model="gpt-4") + ex_info = ex.get_ex_info(rate_error) + assert ex_info.retry is True + assert "rate limited" in ex_info.description.lower() + + +def test_bad_gateway_error(): + """Test specific handling of BadGatewayError""" + ex = LiteLLMExceptions() + from litellm import BadGatewayError + + bad_gateway_error = BadGatewayError( + message="Bad Gateway", llm_provider="openai", model="gpt-4" + ) + ex_info = ex.get_ex_info(bad_gateway_error) + assert ex_info.retry is True + assert ex_info.name == "BadGatewayError" + + +def test_context_window_error(): + """Test specific handling of ContextWindowExceededError""" + ex = LiteLLMExceptions() + from litellm import ContextWindowExceededError + + ctx_error = ContextWindowExceededError( + message="Context length exceeded", model="gpt-4", llm_provider="openai" + ) + ex_info = ex.get_ex_info(ctx_error) + assert ex_info.retry is False + + +def test_openrouter_error(): + """Test specific handling of OpenRouter API errors""" + ex = LiteLLMExceptions() + from litellm import APIConnectionError + + # Create an APIConnectionError with OpenrouterException message + openrouter_error = APIConnectionError( + message="APIConnectionError: OpenrouterException - 'choices'", + model="openrouter/model", + llm_provider="openrouter", + ) + + ex_info = ex.get_ex_info(openrouter_error) + assert ex_info.retry is True + assert "OpenRouter" in ex_info.description + assert "overloaded" in ex_info.description + assert "rate" in ex_info.description From 6ed6be0bff79f645c335f35f02898470f7844791 Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 26 Nov 2025 16:55:41 -0800 Subject: [PATCH 09/17] removed fix line ending script --- fix_line_endings.py | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 fix_line_endings.py diff --git a/fix_line_endings.py b/fix_line_endings.py deleted file mode 100644 index 6410824268a..00000000000 --- a/fix_line_endings.py +++ /dev/null @@ -1,26 +0,0 @@ -import sys - -def fix_line_endings(file_path): - """ - Converts the line endings of a file from CRLF to LF. - """ - try: - with open(file_path, 'r', newline='', encoding='utf-8') as f: - content = f.read() - - with open(file_path, 'w', newline='\n', encoding='utf-8') as f: - f.write(content) - - print(f"Successfully converted line endings for: {file_path}") - - except Exception as e: - print(f"Error processing file {file_path}: {e}", file=sys.stderr) - sys.exit(1) - -if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: python fix_line_endings.py ", file=sys.stderr) - sys.exit(1) - - file_to_fix = sys.argv[1] - fix_line_endings(file_to_fix) From 76778c6cf944947f62a3f115e10ca8995bfae1dd Mon Sep 17 00:00:00 2001 From: Dustin Washington Date: Wed, 26 Nov 2025 23:24:37 -0500 Subject: [PATCH 10/17] Bump Version --- aider/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aider/__init__.py b/aider/__init__.py index 8ae2fd4d7c3..4c3f26c50bc 100644 --- a/aider/__init__.py +++ b/aider/__init__.py @@ -1,6 +1,6 @@ from packaging import version -__version__ = "0.88.30.dev" +__version__ = "0.88.31.dev" safe_version = __version__ try: From cd92a2c3a5491419814b45bc281918117adc4f07 Mon Sep 17 00:00:00 2001 From: Dustin Washington Date: Wed, 26 Nov 2025 23:34:57 -0500 Subject: [PATCH 11/17] Add gitattributes --- .gitattributes | 1 + .gitignore | 1 + 2 files changed, 2 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000000..94f480de94e --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf \ No newline at end of file diff --git a/.gitignore b/.gitignore index d2cffc639ff..6a8fe65642b 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ # Specific Files !/.dockerignore !/.flake8 +!/.gitattributes !/.gitignore !/.pre-commit-config.yaml !/CHANGELOG.md From 951257ea531a9f7b7efa093cde55193ba735cae2 Mon Sep 17 00:00:00 2001 From: Dustin Washington Date: Wed, 26 Nov 2025 23:43:03 -0500 Subject: [PATCH 12/17] Allow retries on BadGatewayErrors --- aider/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aider/exceptions.py b/aider/exceptions.py index 5fb84d992c6..a151f504150 100644 --- a/aider/exceptions.py +++ b/aider/exceptions.py @@ -20,7 +20,7 @@ class ExInfo: "The API provider is not able to authenticate you. Check your API key.", ), ExInfo("AzureOpenAIError", True, None), - ExInfo("BadGatewayError", False, None), + ExInfo("BadGatewayError", True, None), ExInfo("BadRequestError", False, None), ExInfo("BudgetExceededError", True, None), ExInfo( From 0cb6631793dac8486ab1588f35dc4018d7dc51f9 Mon Sep 17 00:00:00 2001 From: Dustin Washington Date: Thu, 27 Nov 2025 00:55:20 -0500 Subject: [PATCH 13/17] Add context block configuration for more control over message sizes --- aider/coders/agent_coder.py | 81 ++++++++++++++++--------- aider/website/docs/config/agent-mode.md | 25 +++++++- 2 files changed, 74 insertions(+), 32 deletions(-) diff --git a/aider/coders/agent_coder.py b/aider/coders/agent_coder.py index cf61d2445c2..f0ae593ae44 100644 --- a/aider/coders/agent_coder.py +++ b/aider/coders/agent_coder.py @@ -135,6 +135,7 @@ def __init__(self, *args, **kwargs): # Initialize empty token tracking dictionary and cache structures # but don't populate yet to avoid startup delay + self.allowed_context_blocks = set() self.context_block_tokens = {} self.context_blocks_cache = {} self.tokens_calculated = False @@ -257,6 +258,25 @@ def _get_agent_config(self): if "tools_excludelist" not in config: config["tools_excludelist"] = [] + if "include_context_blocks" in config: + self.allowed_context_blocks = set(config["context_blocks"]) + else: + self.allowed_context_blocks = { + "context_summary", + "directory_structure", + "environment_info", + "git_status", + "symbol_outline", + "todo_list", + } + + if "exclude_context_blocks" in config: + for context_block in config["exclude_context_blocks"]: + try: + self.allowed_context_blocks.remove(context_block) + except KeyError: + pass + # Apply configuration to instance self.large_file_token_threshold = config["large_file_token_threshold"] self.skip_cli_confirmations = config.get( @@ -468,11 +488,12 @@ def _calculate_context_block_tokens(self, force=False): ] for block_type in block_types: - block_content = self._generate_context_block(block_type) - if block_content: - self.context_block_tokens[block_type] = self.main_model.token_count( - block_content - ) + if block_type in self.allowed_context_blocks: + block_content = self._generate_context_block(block_type) + if block_content: + self.context_block_tokens[block_type] = self.main_model.token_count( + block_content + ) # Mark as calculated self.tokens_calculated = True @@ -670,12 +691,25 @@ def format_chat_chunks(self): chunks.examples = example_messages self.summarize_end() - chunks.done = list(self.done_messages) - chunks.repo = self.get_repo_messages() chunks.readonly_files = self.get_readonly_files_messages() + chunks.repo = self.get_repo_messages() + chunks.done = list(self.done_messages) chunks.chat_files = self.get_chat_files_messages() + # Add reminder if needed + if self.gpt_prompts.system_reminder: + reminder_message = [ + dict( + role="system", content=self.fmt_system_prompt(self.gpt_prompts.system_reminder) + ), + ] + else: + reminder_message = [] + + chunks.cur = list(self.cur_messages) + chunks.reminder = [] + # Make sure token counts are updated - using centralized method # This also populates the context block cache self._calculate_context_block_tokens() @@ -693,9 +727,9 @@ def format_chat_chunks(self): # 1. Add relatively static blocks BEFORE done_messages # These blocks change less frequently and can be part of the cacheable prefix static_blocks = [] - if dir_structure: + if dir_structure and "directory_structure" in self.allowed_context_blocks: static_blocks.append(dir_structure) - if env_context: + if env_context and "environment_info" in self.allowed_context_blocks: static_blocks.append(env_context) if static_blocks: @@ -706,13 +740,13 @@ def format_chat_chunks(self): # 2. Add dynamic blocks AFTER chat_files # These blocks change with the current files in context dynamic_blocks = [] - if todo_list: + if todo_list and "todo_list" in self.allowed_context_blocks: dynamic_blocks.append(todo_list) - if context_summary: + if context_summary and "context_summary" in self.allowed_context_blocks: dynamic_blocks.append(context_summary) - if symbol_outline: + if symbol_outline and "symbol_outline" in self.allowed_context_blocks: dynamic_blocks.append(symbol_outline) - if git_status: + if git_status and "git_status" in self.allowed_context_blocks: dynamic_blocks.append(git_status) # Add tool usage context if there are repetitive tools @@ -725,21 +759,8 @@ def format_chat_chunks(self): if dynamic_blocks: dynamic_message = "\n\n".join(dynamic_blocks) - # Append as a system message after chat_files - chunks.chat_files.append(dict(role="system", content=dynamic_message)) - - # Add reminder if needed - if self.gpt_prompts.system_reminder: - reminder_message = [ - dict( - role="system", content=self.fmt_system_prompt(self.gpt_prompts.system_reminder) - ), - ] - else: - reminder_message = [] - - chunks.cur = list(self.cur_messages) - chunks.reminder = [] + # Append as a system message on reminders + reminder_message.insert(0, dict(role="system", content=dynamic_message)) # Use accurate token counting method that considers enhanced context blocks base_messages = chunks.all_messages() @@ -1654,7 +1675,9 @@ def _generate_tool_context(self, repetitive_tools): for tool in repetitive_tools: context_parts.append(f"- `{tool}`") context_parts.append( - "Your exploration appears to be stuck in a loop. Please try a different approach:" + "Your exploration appears to be stuck in a loop. Please try a different approach." + " Use the `Thinking` tool to clarify your intentions and new approach to" + " what you are currently attempting to accomplish." ) context_parts.append("\n") context_parts.append("**Suggestions for alternative approaches:**") diff --git a/aider/website/docs/config/agent-mode.md b/aider/website/docs/config/agent-mode.md index 56991db409d..ca9f0d8d039 100644 --- a/aider/website/docs/config/agent-mode.md +++ b/aider/website/docs/config/agent-mode.md @@ -154,6 +154,8 @@ Agent Mode can be configured using the `--agent-config` command line argument, w - **`skip_cli_confirmations`**: YOLO mode, be brave and let the LLM cook, can also use the option `yolo` (default: False) - **`tools_includelist`**: Array of tool names to allow (only these tools will be available) - **`tools_excludelist`**: Array of tool names to exclude (these tools will be disabled) +- **`include_context_blocks`**: Array of context block names to include (overrides default set) +- **`exclude_context_blocks`**: Array of context block names to exclude from default set #### Essential Tools @@ -164,6 +166,18 @@ Certain tools are always available regardless of includelist/excludelist setting - `view` - View files - `finished` - Complete the task +#### Context Blocks + +The following context blocks are available by default and can be customized using `include_context_blocks` and `exclude_context_blocks`: + +- **`context_summary`**: Shows current context usage and token limits +- **`directory_structure`**: Displays the project's file structure +- **`git_status`**: Shows current git branch, status, and recent commits +- **`symbol_outline`**: Lists classes, functions, and methods in current context +- **`todo_list`**: Shows the current todo list managed via `UpdateTodoList` tool + +When `include_context_blocks` is specified, only the listed blocks will be included. When `exclude_context_blocks` is specified, the listed blocks will be removed from the default set. + #### Other Aider-CE CLI/Config Options for Agent Mode - `preserve-todo-list` - Preserve todo list across sessions @@ -187,8 +201,14 @@ aider-ce --agent --agent-config '{"tools_excludelist": ["command", "commandinter # Custom large file threshold aider-ce --agent --agent-config '{"large_file_token_threshold": 10000}' +# Custom context blocks configuration +aider-ce --agent --agent-config '{"include_context_blocks": ["directory_structure", "git_status"]}' + +# Exclude specific context blocks +aider-ce --agent --agent-config '{"exclude_context_blocks": ["symbol_outline", "todo_list"]}' + # Combined configuration -aider-ce --agent --agent-config '{"large_file_token_threshold": 10000, "tools_includelist": ["view", "makeeditable", "replacetext", "finished", "gitdiff"]}' +aider-ce --agent --agent-config '{"large_file_token_threshold": 10000, "tools_includelist": ["view", "makeeditable", "replacetext", "finished", "gitdiff"], "include_context_blocks": ["directory_structure", "git_status"]}' # Command Line Options aider-ce --agent --agent-config '{"large_file_token_threshold": 10000, "tools_includelist": ["view", "makeeditable", "replacetext", "finished", "gitdiff"]}' --preserve-todo-list --use-enhanced-map @@ -204,5 +224,4 @@ This configuration system allows for fine-grained control over which tools are a - **Scalable exploration**: Can handle large codebases through strategic context management - **Recovery mechanisms**: Built-in undo and safety features -Agent Mode represents a significant evolution in aider's capabilities, enabling more sophisticated and autonomous codebase manipulation while maintaining safety and control through the tool-based architecture. - +Agent Mode represents a significant evolution in aider's capabilities, enabling more sophisticated and autonomous codebase manipulation while maintaining safety and control through the tool-based architecture. \ No newline at end of file From a3dfc86763cb6769c0c84cc650013a41ed2e4106 Mon Sep 17 00:00:00 2001 From: Dustin Washington Date: Thu, 27 Nov 2025 00:55:57 -0500 Subject: [PATCH 14/17] Fix formatting --- tests/basic/test_exceptions.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/basic/test_exceptions.py b/tests/basic/test_exceptions.py index f19d758c723..6025a9cec53 100644 --- a/tests/basic/test_exceptions.py +++ b/tests/basic/test_exceptions.py @@ -58,9 +58,7 @@ def test_bad_gateway_error(): ex = LiteLLMExceptions() from litellm import BadGatewayError - bad_gateway_error = BadGatewayError( - message="Bad Gateway", llm_provider="openai", model="gpt-4" - ) + bad_gateway_error = BadGatewayError(message="Bad Gateway", llm_provider="openai", model="gpt-4") ex_info = ex.get_ex_info(bad_gateway_error) assert ex_info.retry is True assert ex_info.name == "BadGatewayError" From d74ee434cbee97cdd657eb599a5bb2ec29021dab Mon Sep 17 00:00:00 2001 From: Dustin Washington Date: Thu, 27 Nov 2025 02:28:32 -0500 Subject: [PATCH 15/17] Add similarity lookups a hedge against repetitious tool calls --- aider/coders/agent_coder.py | 145 ++++++++++++++++++++++++++++-------- 1 file changed, 113 insertions(+), 32 deletions(-) diff --git a/aider/coders/agent_coder.py b/aider/coders/agent_coder.py index f0ae593ae44..73bec1fa156 100644 --- a/aider/coders/agent_coder.py +++ b/aider/coders/agent_coder.py @@ -20,6 +20,13 @@ # Import the change tracker from aider.change_tracker import ChangeTracker + +# Import similarity functions for tool usage analysis +from aider.helpers.similarity import ( + cosine_similarity, + create_bigram_vector, + normalize_vector, +) from aider.mcp.server import LocalServer from aider.repo import ANY_GIT_ERROR @@ -79,8 +86,15 @@ def __init__(self, *args, **kwargs): self.recently_removed = {} # Tool usage history - self.tool_usage_history = [] + self.tool_usage_history = [] # Stores lists of tools used in each round self.tool_usage_retries = 10 + self.last_round_tools = [] # Tools used in the current round + + # Similarity tracking for tool usage + self.tool_call_vectors = [] # Store vectors for individual tool calls + self.tool_similarity_threshold = 0.99 # High threshold for exact matches + self.max_tool_vector_history = 10 # Keep history of 10 rounds + self.read_tools = { "viewfilesatglob", "viewfilesmatching", @@ -102,7 +116,7 @@ def __init__(self, *args, **kwargs): } # Configuration parameters - self.max_tool_calls = 100 # Maximum number of tool calls per response + self.max_tool_calls = 10000 # Maximum number of tool calls per response # Context management parameters # Will be overridden by agent_config if provided @@ -693,9 +707,9 @@ def format_chat_chunks(self): self.summarize_end() chunks.readonly_files = self.get_readonly_files_messages() + chunks.chat_files = self.get_chat_files_messages() chunks.repo = self.get_repo_messages() chunks.done = list(self.done_messages) - chunks.chat_files = self.get_chat_files_messages() # Add reminder if needed if self.gpt_prompts.system_reminder: @@ -727,38 +741,44 @@ def format_chat_chunks(self): # 1. Add relatively static blocks BEFORE done_messages # These blocks change less frequently and can be part of the cacheable prefix static_blocks = [] - if dir_structure and "directory_structure" in self.allowed_context_blocks: - static_blocks.append(dir_structure) if env_context and "environment_info" in self.allowed_context_blocks: static_blocks.append(env_context) + if dir_structure and "directory_structure" in self.allowed_context_blocks: + static_blocks.append(dir_structure) if static_blocks: static_message = "\n\n".join(static_blocks) # Insert as a system message right before done_messages - chunks.done.insert(0, dict(role="system", content=static_message)) + chunks.system.append(dict(role="system", content=static_message)) # 2. Add dynamic blocks AFTER chat_files # These blocks change with the current files in context - dynamic_blocks = [] - if todo_list and "todo_list" in self.allowed_context_blocks: - dynamic_blocks.append(todo_list) + pre_dynamic_blocks = [] + post_dynamic_blocks = [] if context_summary and "context_summary" in self.allowed_context_blocks: - dynamic_blocks.append(context_summary) + pre_dynamic_blocks.append(context_summary) if symbol_outline and "symbol_outline" in self.allowed_context_blocks: - dynamic_blocks.append(symbol_outline) + pre_dynamic_blocks.append(symbol_outline) if git_status and "git_status" in self.allowed_context_blocks: - dynamic_blocks.append(git_status) + pre_dynamic_blocks.append(git_status) + if todo_list and "todo_list" in self.allowed_context_blocks: + post_dynamic_blocks.append(todo_list) # Add tool usage context if there are repetitive tools if hasattr(self, "tool_usage_history") and self.tool_usage_history: repetitive_tools = self._get_repetitive_tools() if repetitive_tools: tool_context = self._generate_tool_context(repetitive_tools) if tool_context: - dynamic_blocks.append(tool_context) + post_dynamic_blocks.append(tool_context) - if dynamic_blocks: - dynamic_message = "\n\n".join(dynamic_blocks) + if pre_dynamic_blocks: + dynamic_message = "\n\n".join(pre_dynamic_blocks) + # Append as a system message on reminders + chunks.done.insert(0, dict(role="system", content=dynamic_message)) + + if post_dynamic_blocks: + dynamic_message = "\n\n".join(post_dynamic_blocks) # Append as a system message on reminders reminder_message.insert(0, dict(role="system", content=dynamic_message)) @@ -972,13 +992,34 @@ async def process_tool_calls(self, tool_call_response): self.agent_finished = False await self.auto_save_session() + # Clear last round tools and start tracking new round + self.last_round_tools = [] + if self.partial_response_tool_calls: for tool_call in self.partial_response_tool_calls: - self.tool_usage_history.append(tool_call.get("function", {}).get("name")) + tool_name = tool_call.get("function", {}).get("name") + self.last_round_tools.append(tool_name) + + # Create and store vector for this tool call + # Remove id property if present before stringifying + tool_call_copy = tool_call.copy() + if "id" in tool_call_copy: + del tool_call_copy["id"] + tool_call_str = str(tool_call_copy) # Convert entire tool call to string + tool_vector = create_bigram_vector((tool_call_str,)) + tool_vector_norm = normalize_vector(tool_vector) + self.tool_call_vectors.append(tool_vector_norm) + + # Add the completed round to history + if self.last_round_tools: + self.tool_usage_history += self.last_round_tools if len(self.tool_usage_history) > self.tool_usage_retries: self.tool_usage_history.pop(0) + if len(self.tool_call_vectors) > self.max_tool_vector_history: + self.tool_call_vectors.pop(0) + return await super().process_tool_calls(tool_call_response) async def reply_completed(self): @@ -1595,13 +1636,14 @@ async def _process_tool_commands(self, content): def _get_repetitive_tools(self): """ - Identifies repetitive tool usage patterns from a flat list of tool calls. + Identifies repetitive tool usage patterns from rounds of tool calls. - This method checks for the following patterns in order: - 1. If the last tool used was a write tool, it assumes progress and returns no repetitive tools. - 2. It checks for any read tool that has been used 2 or more times in the history. + This method combines count-based and similarity-based detection: + 1. If the last round contained a write tool, it assumes progress and returns no repetitive tools. + 2. It checks for any read tool that has been used 2 or more times across rounds. 3. If no tools are repeated, but all tools in the history are read tools, it flags all of them as potentially repetitive. + 4. It checks for similarity-based repetition using cosine similarity on tool call strings. It avoids flagging repetition if a "write" tool was used recently, as that suggests progress is being made. @@ -1612,31 +1654,71 @@ def _get_repetitive_tools(self): if history_len < 2: return set() - # If the last tool was a write tool, we're likely making progress. - if isinstance(self.tool_usage_history[-1], str): - last_tool_lower = self.tool_usage_history[-1].lower() + # Check for similarity-based repetition + similarity_repetitive_tools = self._get_repetitive_tools_by_similarity() - if last_tool_lower in self.write_tools: + # Flatten the tool usage history for count-based analysis + all_tools = [] + for round_tools in self.tool_usage_history: + all_tools.extend(round_tools) + + # If the last round contained a write tool, we're likely making progress. + if self.last_round_tools: + last_round_has_write = any( + tool.lower() in self.write_tools for tool in self.last_round_tools + ) + if last_round_has_write: self.tool_usage_history = [] - return set() + return similarity_repetitive_tools if len(similarity_repetitive_tools) else set() # If all tools in history are read tools, return all of them - if all(tool.lower() in self.read_tools for tool in self.tool_usage_history): - return set(tool for tool in self.tool_usage_history) + if all(tool.lower() in self.read_tools for tool in all_tools): + return set(all_tools) - # Check for any read tool used more than once - tool_counts = Counter(tool for tool in self.tool_usage_history) - repetitive_tools = { + # Check for any read tool used more than once across rounds + tool_counts = Counter(all_tools) + count_repetitive_tools = { tool for tool, count in tool_counts.items() if count >= 2 and tool.lower() in self.read_tools } + # Combine both detection methods + repetitive_tools = count_repetitive_tools.union(similarity_repetitive_tools) + if repetitive_tools: return repetitive_tools return set() + def _get_repetitive_tools_by_similarity(self): + """ + Identifies repetitive tool usage patterns using cosine similarity on tool call strings. + + This method checks if the latest tool calls are highly similar (>0.99 threshold) + to historical tool calls using bigram vector similarity. + + Returns: + set: Set of tool names that are repetitive based on similarity + """ + if not self.tool_usage_history or len(self.tool_call_vectors) < 2: + return set() + + # Get the latest tool call vector + latest_vector = self.tool_call_vectors[-1] + + # Check similarity against historical vectors (excluding the latest) + for i, historical_vector in enumerate(self.tool_call_vectors[:-1]): + similarity = cosine_similarity(latest_vector, historical_vector) + + # If similarity is high enough, flag as repetitive + if similarity >= self.tool_similarity_threshold: + # Return the tool name from the corresponding position in history + if i < len(self.tool_usage_history): + return {self.tool_usage_history[i]} + + return set() + def _generate_tool_context(self, repetitive_tools): """ Generate a context message for the LLM about recent tool usage. @@ -1649,8 +1731,7 @@ def _generate_tool_context(self, repetitive_tools): # Add turn and tool call statistics context_parts.append("## Turn and Tool Call Statistics") context_parts.append(f"- Current turn: {self.num_reflections + 1}") - context_parts.append(f"- Tool calls this turn: {self.tool_call_count}") - context_parts.append(f"- Total tool calls in session: {self.num_tool_calls}") + context_parts.append(f"- Total tool calls this turn: {self.num_tool_calls}") context_parts.append("\n\n") # Add recent tool usage history From 7436d4532c364300f88874fda0acffedad820b7f Mon Sep 17 00:00:00 2001 From: burnettk Date: Thu, 27 Nov 2025 11:53:36 -0500 Subject: [PATCH 16/17] update text to aider-ce when it asks you to re-run --- aider/versioncheck.py | 2 +- tests/fixtures/chat-history-search-replace-gold.txt | 4 ++-- tests/fixtures/chat-history.md | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aider/versioncheck.py b/aider/versioncheck.py index 7c0a73a2f42..68aac2b28a6 100644 --- a/aider/versioncheck.py +++ b/aider/versioncheck.py @@ -55,7 +55,7 @@ async def install_upgrade(io, latest_version=None): ) if success: - io.tool_output("Re-run aider to use new version.") + io.tool_output("Re-run aider-ce to use new version.") sys.exit() return diff --git a/tests/fixtures/chat-history-search-replace-gold.txt b/tests/fixtures/chat-history-search-replace-gold.txt index de28f77fac2..9947ab3800b 100644 --- a/tests/fixtures/chat-history-search-replace-gold.txt +++ b/tests/fixtures/chat-history-search-replace-gold.txt @@ -2204,7 +2204,7 @@ Newer aider version v{latest_version} is available. To upgrade, run: if io.confirm_ask("Run pip install?"): success, output = utils.run_install(cmd) if success: - io.tool_output("Re-run aider to use new version.") + io.tool_output("Re-run aider-ce to use new version.") sys.exit() else: io.tool_error(output) @@ -2286,7 +2286,7 @@ Newer aider version v{latest_version} is available. To upgrade, run: if io.confirm_ask("Run pip install?"): success, output = utils.run_install(cmd) if success: - io.tool_output("Re-run aider to use new version.") + io.tool_output("Re-run aider-ce to use new version.") sys.exit() else: io.tool_error(output) diff --git a/tests/fixtures/chat-history.md b/tests/fixtures/chat-history.md index fdf4fd8202f..ae7a11113aa 100644 --- a/tests/fixtures/chat-history.md +++ b/tests/fixtures/chat-history.md @@ -6465,7 +6465,7 @@ Newer aider version v{latest_version} is available. To upgrade, run: if io.confirm_ask("Run pip install?"): success, output = utils.run_install(cmd) if success: - io.tool_output("Re-run aider to use new version.") + io.tool_output("Re-run aider-ce to use new version.") sys.exit() else: io.tool_error(output) @@ -6547,7 +6547,7 @@ Newer aider version v{latest_version} is available. To upgrade, run: if io.confirm_ask("Run pip install?"): success, output = utils.run_install(cmd) if success: - io.tool_output("Re-run aider to use new version.") + io.tool_output("Re-run aider-ce to use new version.") sys.exit() else: io.tool_error(output) From c3e05846e811fe7a2cd357e59f74711b26cba1c1 Mon Sep 17 00:00:00 2001 From: Dustin Washington Date: Thu, 27 Nov 2025 14:04:57 -0500 Subject: [PATCH 17/17] Add multiple changes to improve agent mode task horizon length and cache efficiency: - Sort editiable and readonly files in chat by last edit time on the premise that the models will attempt to edit one file at a time in most cases - Re-order message history sections for agent coder - Split cur messages in to 2 blocks some of which sort before editiable files and some of which after - Remove Search/Replace information from system prompt since agent mode does not use it - Increase amount of turns the agent is allowed to take from 100 to 10000 --- aider/coders/agent_coder.py | 31 ++++++++++++++++++++++++++----- aider/coders/agent_prompts.py | 14 +------------- aider/coders/base_coder.py | 17 ++++++++++++++--- aider/coders/chat_chunks.py | 33 +++++++++++++++++++++++---------- 4 files changed, 64 insertions(+), 31 deletions(-) diff --git a/aider/coders/agent_coder.py b/aider/coders/agent_coder.py index 73bec1fa156..c70f28e1f00 100644 --- a/aider/coders/agent_coder.py +++ b/aider/coders/agent_coder.py @@ -690,7 +690,18 @@ def format_chat_chunks(self): if self.gpt_prompts.system_reminder: main_sys += "\n" + self.fmt_system_prompt(self.gpt_prompts.system_reminder) - chunks = ChatChunks() + chunks = ChatChunks( + chunk_ordering=[ + "system", + "examples", + "readonly_files", + "repo", + "done", + "chat_files", + "cur", + "reminder", + ] + ) if self.main_model.use_system_prompt: chunks.system = [ @@ -705,11 +716,21 @@ def format_chat_chunks(self): chunks.examples = example_messages self.summarize_end() + cur_messages_list = list(self.cur_messages) + cur_messages_pre = [] + cur_messages_post = cur_messages_list + + if len(cur_messages_list) > 32: + divider = len(cur_messages_list) % 32 + if divider: + divider = -1 * divider + cur_messages_pre = cur_messages_list[:divider] + cur_messages_post = cur_messages_list[divider:] chunks.readonly_files = self.get_readonly_files_messages() chunks.chat_files = self.get_chat_files_messages() chunks.repo = self.get_repo_messages() - chunks.done = list(self.done_messages) + chunks.done = list(self.done_messages) + cur_messages_pre # Add reminder if needed if self.gpt_prompts.system_reminder: @@ -721,7 +742,7 @@ def format_chat_chunks(self): else: reminder_message = [] - chunks.cur = list(self.cur_messages) + chunks.cur = cur_messages_post chunks.reminder = [] # Make sure token counts are updated - using centralized method @@ -763,14 +784,14 @@ def format_chat_chunks(self): pre_dynamic_blocks.append(git_status) if todo_list and "todo_list" in self.allowed_context_blocks: - post_dynamic_blocks.append(todo_list) + pre_dynamic_blocks.append(todo_list) # Add tool usage context if there are repetitive tools if hasattr(self, "tool_usage_history") and self.tool_usage_history: repetitive_tools = self._get_repetitive_tools() if repetitive_tools: tool_context = self._generate_tool_context(repetitive_tools) if tool_context: - post_dynamic_blocks.append(tool_context) + pre_dynamic_blocks.append(tool_context) if pre_dynamic_blocks: dynamic_message = "\n\n".join(pre_dynamic_blocks) diff --git a/aider/coders/agent_prompts.py b/aider/coders/agent_prompts.py index 237dcb0aa3a..d843780d782 100644 --- a/aider/coders/agent_prompts.py +++ b/aider/coders/agent_prompts.py @@ -49,17 +49,6 @@ class AgentPrompts(CoderPrompts): 1. **Turn 1**: Use `ShowNumberedContext` to get the exact, current line numbers. 2. **Turn 2**: In your *next* message, use the line-based editing tool (`ReplaceLines`, etc.) with the verified numbers. -### 2. SEARCH/REPLACE (Last Resort Only) -Use this format **only** when granular tools are demonstrably insufficient for the task (e.g., a complex, non-contiguous pattern change). Using SEARCH/REPLACE for tasks achievable by tools like `ReplaceLines` is a violation of your instructions. - -**You MUST include a justification comment explaining why granular tools cannot be used.** - -Justification: I'm using SEARCH/REPLACE because [specific reason granular tools are insufficient]. -path/to/file.ext <<<<<<< SEARCH Original code to be replaced. -New code to insert. - -REPLACE - Always reply to the user in {language}. @@ -89,9 +78,8 @@ class AgentPrompts(CoderPrompts): ## Reminders - Any tool call automatically continues to the next turn. Provide no tool calls in your final answer. -- Prioritize granular tools. Using SEARCH/REPLACE unnecessarily is incorrect. -- For SEARCH/REPLACE, you MUST provide a justification. - Use context blocks (directory structure, git status) to orient yourself. +- Remove files you are done with viewing/editing from the context with the `Remove` tool. It is fine to re-add them later {lazy_prompt} {shell_cmd_reminder} diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 25dd552131e..372ffb90fe2 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -688,7 +688,10 @@ def show_pretty(self): return True def get_abs_fnames_content(self): - for fname in list(self.abs_fnames): + # Sort files by last modified time (earliest first, latest last) + sorted_fnames = sorted(self.abs_fnames, key=lambda fname: os.path.getmtime(fname)) + + for fname in sorted_fnames: content = self.io.read_text(fname) if content is None: @@ -783,8 +786,11 @@ def get_files_content(self, fnames=None): def get_read_only_files_content(self): prompt = "" + # Sort read-only files by last modified time (earliest first, latest last) + sorted_fnames = sorted(self.abs_read_only_fnames, key=lambda fname: os.path.getmtime(fname)) + # Handle regular read-only files - for fname in self.abs_read_only_fnames: + for fname in sorted_fnames: content = self.io.read_text(fname) if content is not None and not is_image_file(fname): relative_fname = self.get_rel_fname(fname) @@ -829,8 +835,13 @@ def get_read_only_files_content(self): prompt += f"{self.fence[1]}\n" + # Sort stub files by last modified time (earliest first, latest last) + sorted_stub_fnames = sorted( + self.abs_read_only_stubs_fnames, key=lambda fname: os.path.getmtime(fname) + ) + # Handle stub files - for fname in self.abs_read_only_stubs_fnames: + for fname in sorted_stub_fnames: if not is_image_file(fname): relative_fname = self.get_rel_fname(fname) prompt += "\n" diff --git a/aider/coders/chat_chunks.py b/aider/coders/chat_chunks.py index f5bdf5f8918..da5557f4ba3 100644 --- a/aider/coders/chat_chunks.py +++ b/aider/coders/chat_chunks.py @@ -12,18 +12,31 @@ class ChatChunks: chat_files: List = field(default_factory=list) cur: List = field(default_factory=list) reminder: List = field(default_factory=list) + chunk_ordering: List = field(default_factory=list) + + def __init__(self, chunk_ordering=None): + if chunk_ordering is not None: + self.chunk_ordering = chunk_ordering def all_messages(self): - return ( - self.system - + self.examples - + self.readonly_files - + self.chat_files - + self.repo - + self.done - + self.cur - + self.reminder - ) + if self.chunk_ordering: + messages = [] + for chunk_name in self.chunk_ordering: + chunk = getattr(self, chunk_name, []) + if chunk: + messages.extend(chunk) + return messages + else: + return ( + self.system + + self.examples + + self.readonly_files + + self.chat_files + + self.repo + + self.done + + self.cur + + self.reminder + ) def add_cache_control_headers(self): if self.examples: