From 0cda1c3a3a39b55b104dc216c3667cc8b65215d9 Mon Sep 17 00:00:00 2001
From: Your Name <szmania@yahoo.com>
Date: Wed, 26 Nov 2025 14:01:19 -0800
Subject: [PATCH 01/17] fix: Add BadGatewayError to exceptions list for retry.

Co-authored-by: aider (openai/gemini_szmania/gemini-2.5-pro) <aider@aider.chat>
---
 aider/exceptions.py | 229 ++++++++++++++++++++++----------------------
 1 file changed, 115 insertions(+), 114 deletions(-)

diff --git a/aider/exceptions.py b/aider/exceptions.py
index 0348df5b4b0..ade8a491c22 100644
--- a/aider/exceptions.py
+++ b/aider/exceptions.py
@@ -1,114 +1,115 @@
-from dataclasses import dataclass
-
-from aider.dump import dump  # noqa: F401
-
-
-@dataclass
-class ExInfo:
-    name: str
-    retry: bool
-    description: str
-
-
-EXCEPTIONS = [
-    ExInfo("APIConnectionError", True, None),
-    ExInfo("APIError", True, None),
-    ExInfo("APIResponseValidationError", True, None),
-    ExInfo(
-        "AuthenticationError",
-        False,
-        "The API provider is not able to authenticate you. Check your API key.",
-    ),
-    ExInfo("AzureOpenAIError", True, None),
-    ExInfo("BadRequestError", False, None),
-    ExInfo("BudgetExceededError", True, None),
-    ExInfo(
-        "ContentPolicyViolationError",
-        True,
-        "The API provider has refused the request due to a safety policy about the content.",
-    ),
-    ExInfo("ContextWindowExceededError", False, None),  # special case handled in base_coder
-    ExInfo("ErrorEventError", True, None),
-    ExInfo("ImageFetchError", True, "The API cannot fetch an image"),
-    ExInfo("InternalServerError", True, "The API provider's servers are down or overloaded."),
-    ExInfo("InvalidRequestError", True, None),
-    ExInfo("JSONSchemaValidationError", True, None),
-    ExInfo("NotFoundError", False, None),
-    ExInfo("OpenAIError", True, None),
-    ExInfo(
-        "RateLimitError",
-        True,
-        "The API provider has rate limited you. Try again later or check your quotas.",
-    ),
-    ExInfo("RouterRateLimitError", True, None),
-    ExInfo("ServiceUnavailableError", True, "The API provider's servers are down or overloaded."),
-    ExInfo("UnprocessableEntityError", True, None),
-    ExInfo("UnsupportedParamsError", True, None),
-    ExInfo(
-        "Timeout",
-        True,
-        "The API provider timed out without returning a response. They may be down or overloaded.",
-    ),
-]
-
-
-class LiteLLMExceptions:
-    exceptions = dict()
-    exception_info = {exi.name: exi for exi in EXCEPTIONS}
-
-    def __init__(self):
-        self._load()
-
-    def _load(self, strict=False):
-        import litellm
-
-        for var in dir(litellm):
-            if var.endswith("Error"):
-                if var not in self.exception_info:
-                    raise ValueError(f"{var} is in litellm but not in aider's exceptions list")
-
-        for var in self.exception_info:
-            ex = getattr(litellm, var, "default")
-
-            if ex != "default":
-                if not issubclass(ex, BaseException):
-                    continue
-
-                self.exceptions[ex] = self.exception_info[var]
-
-    def exceptions_tuple(self):
-        return tuple(self.exceptions)
-
-    def get_ex_info(self, ex):
-        """Return the ExInfo for a given exception instance"""
-        import litellm
-
-        if ex.__class__ is litellm.APIConnectionError:
-            if "google.auth" in str(ex):
-                return ExInfo(
-                    "APIConnectionError", False, "You need to: pip install google-generativeai"
-                )
-            if "boto3" in str(ex):
-                return ExInfo("APIConnectionError", False, "You need to: pip install boto3")
-            if "OpenrouterException" in str(ex) and "'choices'" in str(ex):
-                return ExInfo(
-                    "APIConnectionError",
-                    True,
-                    (
-                        "OpenRouter or the upstream API provider is down, overloaded or rate"
-                        " limiting your requests."
-                    ),
-                )
-
-        # Check for specific non-retryable APIError cases like insufficient credits
-        if ex.__class__ is litellm.APIError:
-            err_str = str(ex).lower()
-            if "insufficient credits" in err_str and '"code":402' in err_str:
-                return ExInfo(
-                    "APIError",
-                    False,
-                    "Insufficient credits with the API provider. Please add credits.",
-                )
-            # Fall through to default APIError handling if not the specific credits error
-
-        return self.exceptions.get(ex.__class__, ExInfo(None, None, None))
+from dataclasses import dataclass
+
+from aider.dump import dump  # noqa: F401
+
+
+@dataclass
+class ExInfo:
+    name: str
+    retry: bool
+    description: str
+
+
+EXCEPTIONS = [
+    ExInfo("APIConnectionError", True, None),
+    ExInfo("APIError", True, None),
+    ExInfo("APIResponseValidationError", True, None),
+    ExInfo(
+        "AuthenticationError",
+        False,
+        "The API provider is not able to authenticate you. Check your API key.",
+    ),
+    ExInfo("AzureOpenAIError", True, None),
+    ExInfo("BadGatewayError", True, None),
+    ExInfo("BadRequestError", False, None),
+    ExInfo("BudgetExceededError", True, None),
+    ExInfo(
+        "ContentPolicyViolationError",
+        True,
+        "The API provider has refused the request due to a safety policy about the content.",
+    ),
+    ExInfo("ContextWindowExceededError", False, None),  # special case handled in base_coder
+    ExInfo("ErrorEventError", True, None),
+    ExInfo("ImageFetchError", True, "The API cannot fetch an image"),
+    ExInfo("InternalServerError", True, "The API provider's servers are down or overloaded."),
+    ExInfo("InvalidRequestError", True, None),
+    ExInfo("JSONSchemaValidationError", True, None),
+    ExInfo("NotFoundError", False, None),
+    ExInfo("OpenAIError", True, None),
+    ExInfo(
+        "RateLimitError",
+        True,
+        "The API provider has rate limited you. Try again later or check your quotas.",
+    ),
+    ExInfo("RouterRateLimitError", True, None),
+    ExInfo("ServiceUnavailableError", True, "The API provider's servers are down or overloaded."),
+    ExInfo("UnprocessableEntityError", True, None),
+    ExInfo("UnsupportedParamsError", True, None),
+    ExInfo(
+        "Timeout",
+        True,
+        "The API provider timed out without returning a response. They may be down or overloaded.",
+    ),
+]
+
+
+class LiteLLMExceptions:
+    exceptions = dict()
+    exception_info = {exi.name: exi for exi in EXCEPTIONS}
+
+    def __init__(self):
+        self._load()
+
+    def _load(self, strict=False):
+        import litellm
+
+        for var in dir(litellm):
+            if var.endswith("Error"):
+                if var not in self.exception_info:
+                    raise ValueError(f"{var} is in litellm but not in aider's exceptions list")
+
+        for var in self.exception_info:
+            ex = getattr(litellm, var, "default")
+
+            if ex != "default":
+                if not issubclass(ex, BaseException):
+                    continue
+
+                self.exceptions[ex] = self.exception_info[var]
+
+    def exceptions_tuple(self):
+        return tuple(self.exceptions)
+
+    def get_ex_info(self, ex):
+        """Return the ExInfo for a given exception instance"""
+        import litellm
+
+        if ex.__class__ is litellm.APIConnectionError:
+            if "google.auth" in str(ex):
+                return ExInfo(
+                    "APIConnectionError", False, "You need to: pip install google-generativeai"
+                )
+            if "boto3" in str(ex):
+                return ExInfo("APIConnectionError", False, "You need to: pip install boto3")
+            if "OpenrouterException" in str(ex) and "'choices'" in str(ex):
+                return ExInfo(
+                    "APIConnectionError",
+                    True,
+                    (
+                        "OpenRouter or the upstream API provider is down, overloaded or rate"
+                        " limiting your requests."
+                    ),
+                )
+
+        # Check for specific non-retryable APIError cases like insufficient credits
+        if ex.__class__ is litellm.APIError:
+            err_str = str(ex).lower()
+            if "insufficient credits" in err_str and '"code":402' in err_str:
+                return ExInfo(
+                    "APIError",
+                    False,
+                    "Insufficient credits with the API provider. Please add credits.",
+                )
+            # Fall through to default APIError handling if not the specific credits error
+
+        return self.exceptions.get(ex.__class__, ExInfo(None, None, None))

From 0a45d42079c59f49b7f9897dc8874437c9ae361a Mon Sep 17 00:00:00 2001
From: Your Name <szmania@yahoo.com>
Date: Wed, 26 Nov 2025 14:02:12 -0800
Subject: [PATCH 02/17] feat: Add BadGatewayError handling and test case

Co-authored-by: aider (openai/gemini_szmania/gemini-2.5-pro) <aider@aider.chat>
---
 tests/basic/test_exceptions.py | 181 ++++++++++++++++++---------------
 1 file changed, 97 insertions(+), 84 deletions(-)

diff --git a/tests/basic/test_exceptions.py b/tests/basic/test_exceptions.py
index 5f9c095f8b6..821bf249956 100644
--- a/tests/basic/test_exceptions.py
+++ b/tests/basic/test_exceptions.py
@@ -1,84 +1,97 @@
-from aider.exceptions import ExInfo, LiteLLMExceptions
-
-
-def test_litellm_exceptions_load():
-    """Test that LiteLLMExceptions loads without errors"""
-    ex = LiteLLMExceptions()
-    assert len(ex.exceptions) > 0
-
-
-def test_exceptions_tuple():
-    """Test that exceptions_tuple returns a non-empty tuple"""
-    ex = LiteLLMExceptions()
-    assert isinstance(ex.exceptions_tuple(), tuple)
-    assert len(ex.exceptions_tuple()) > 0
-
-
-def test_get_ex_info():
-    """Test get_ex_info returns correct ExInfo"""
-    ex = LiteLLMExceptions()
-
-    # Test with a known exception type
-    from litellm import AuthenticationError
-
-    auth_error = AuthenticationError(
-        message="Invalid API key", llm_provider="openai", model="gpt-4"
-    )
-    ex_info = ex.get_ex_info(auth_error)
-    assert isinstance(ex_info, ExInfo)
-    assert ex_info.name == "AuthenticationError"
-    assert ex_info.retry is False
-    assert "API key" in ex_info.description
-
-    # Test with unknown exception type
-    class UnknownError(Exception):
-        pass
-
-    unknown = UnknownError()
-    ex_info = ex.get_ex_info(unknown)
-    assert isinstance(ex_info, ExInfo)
-    assert ex_info.name is None
-    assert ex_info.retry is None
-    assert ex_info.description is None
-
-
-def test_rate_limit_error():
-    """Test specific handling of RateLimitError"""
-    ex = LiteLLMExceptions()
-    from litellm import RateLimitError
-
-    rate_error = RateLimitError(message="Rate limit exceeded", llm_provider="openai", model="gpt-4")
-    ex_info = ex.get_ex_info(rate_error)
-    assert ex_info.retry is True
-    assert "rate limited" in ex_info.description.lower()
-
-
-def test_context_window_error():
-    """Test specific handling of ContextWindowExceededError"""
-    ex = LiteLLMExceptions()
-    from litellm import ContextWindowExceededError
-
-    ctx_error = ContextWindowExceededError(
-        message="Context length exceeded", model="gpt-4", llm_provider="openai"
-    )
-    ex_info = ex.get_ex_info(ctx_error)
-    assert ex_info.retry is False
-
-
-def test_openrouter_error():
-    """Test specific handling of OpenRouter API errors"""
-    ex = LiteLLMExceptions()
-    from litellm import APIConnectionError
-
-    # Create an APIConnectionError with OpenrouterException message
-    openrouter_error = APIConnectionError(
-        message="APIConnectionError: OpenrouterException - 'choices'",
-        model="openrouter/model",
-        llm_provider="openrouter",
-    )
-
-    ex_info = ex.get_ex_info(openrouter_error)
-    assert ex_info.retry is True
-    assert "OpenRouter" in ex_info.description
-    assert "overloaded" in ex_info.description
-    assert "rate" in ex_info.description
+from aider.exceptions import ExInfo, LiteLLMExceptions
+
+
+def test_litellm_exceptions_load():
+    """Test that LiteLLMExceptions loads without errors"""
+    ex = LiteLLMExceptions()
+    assert len(ex.exceptions) > 0
+
+
+def test_exceptions_tuple():
+    """Test that exceptions_tuple returns a non-empty tuple"""
+    ex = LiteLLMExceptions()
+    assert isinstance(ex.exceptions_tuple(), tuple)
+    assert len(ex.exceptions_tuple()) > 0
+
+
+def test_get_ex_info():
+    """Test get_ex_info returns correct ExInfo"""
+    ex = LiteLLMExceptions()
+
+    # Test with a known exception type
+    from litellm import AuthenticationError
+
+    auth_error = AuthenticationError(
+        message="Invalid API key", llm_provider="openai", model="gpt-4"
+    )
+    ex_info = ex.get_ex_info(auth_error)
+    assert isinstance(ex_info, ExInfo)
+    assert ex_info.name == "AuthenticationError"
+    assert ex_info.retry is False
+    assert "API key" in ex_info.description
+
+    # Test with unknown exception type
+    class UnknownError(Exception):
+        pass
+
+    unknown = UnknownError()
+    ex_info = ex.get_ex_info(unknown)
+    assert isinstance(ex_info, ExInfo)
+    assert ex_info.name is None
+    assert ex_info.retry is None
+    assert ex_info.description is None
+
+
+def test_rate_limit_error():
+    """Test specific handling of RateLimitError"""
+    ex = LiteLLMExceptions()
+    from litellm import RateLimitError
+
+    rate_error = RateLimitError(message="Rate limit exceeded", llm_provider="openai", model="gpt-4")
+    ex_info = ex.get_ex_info(rate_error)
+    assert ex_info.retry is True
+    assert "rate limited" in ex_info.description.lower()
+
+
+def test_bad_gateway_error():
+    """Test specific handling of BadGatewayError"""
+    ex = LiteLLMExceptions()
+    from litellm import BadGatewayError
+
+    bad_gateway_error = BadGatewayError(
+        message="Bad Gateway", llm_provider="openai", model="gpt-4"
+    )
+    ex_info = ex.get_ex_info(bad_gateway_error)
+    assert ex_info.retry is True
+    assert ex_info.name == "BadGatewayError"
+
+
+def test_context_window_error():
+    """Test specific handling of ContextWindowExceededError"""
+    ex = LiteLLMExceptions()
+    from litellm import ContextWindowExceededError
+
+    ctx_error = ContextWindowExceededError(
+        message="Context length exceeded", model="gpt-4", llm_provider="openai"
+    )
+    ex_info = ex.get_ex_info(ctx_error)
+    assert ex_info.retry is False
+
+
+def test_openrouter_error():
+    """Test specific handling of OpenRouter API errors"""
+    ex = LiteLLMExceptions()
+    from litellm import APIConnectionError
+
+    # Create an APIConnectionError with OpenrouterException message
+    openrouter_error = APIConnectionError(
+        message="APIConnectionError: OpenrouterException - 'choices'",
+        model="openrouter/model",
+        llm_provider="openrouter",
+    )
+
+    ex_info = ex.get_ex_info(openrouter_error)
+    assert ex_info.retry is True
+    assert "OpenRouter" in ex_info.description
+    assert "overloaded" in ex_info.description
+    assert "rate" in ex_info.description

From 129bf85f2a047c7250a397b4b824a4c3b680c39c Mon Sep 17 00:00:00 2001
From: Your Name <szmania@yahoo.com>
Date: Wed, 26 Nov 2025 14:07:38 -0800
Subject: [PATCH 03/17] fix: Deduplicate tools before sending to the model to
 avoid errors.

Co-authored-by: aider (openai/gemini_szmania/gemini-2.5-pro) <aider@aider.chat>
---
 aider/models.py | 2637 ++++++++++++++++++++++++-----------------------
 1 file changed, 1324 insertions(+), 1313 deletions(-)

diff --git a/aider/models.py b/aider/models.py
index 4c09161d02a..d86cd3c82a0 100644
--- a/aider/models.py
+++ b/aider/models.py
@@ -1,1313 +1,1324 @@
-import asyncio
-import difflib
-import hashlib
-import importlib.resources
-import json
-import math
-import os
-import platform
-import sys
-import time
-from dataclasses import dataclass, fields
-from pathlib import Path
-from typing import Optional, Union
-
-import json5
-import yaml
-from PIL import Image
-
-from aider import __version__
-from aider.dump import dump  # noqa: F401
-from aider.llm import litellm
-from aider.openrouter import OpenRouterModelManager
-from aider.sendchat import ensure_alternating_roles, sanity_check_messages
-from aider.utils import check_pip_install_extra
-
-RETRY_TIMEOUT = 60
-
-request_timeout = 600
-
-DEFAULT_MODEL_NAME = "gpt-4o"
-ANTHROPIC_BETA_HEADER = "prompt-caching-2024-07-31,pdfs-2024-09-25"
-
-OPENAI_MODELS = """
-o1
-o1-preview
-o1-mini
-o3-mini
-gpt-4
-gpt-4o
-gpt-4o-2024-05-13
-gpt-4-turbo-preview
-gpt-4-0314
-gpt-4-0613
-gpt-4-32k
-gpt-4-32k-0314
-gpt-4-32k-0613
-gpt-4-turbo
-gpt-4-turbo-2024-04-09
-gpt-4-1106-preview
-gpt-4-0125-preview
-gpt-4-vision-preview
-gpt-4-1106-vision-preview
-gpt-4o-mini
-gpt-4o-mini-2024-07-18
-gpt-3.5-turbo
-gpt-3.5-turbo-0301
-gpt-3.5-turbo-0613
-gpt-3.5-turbo-1106
-gpt-3.5-turbo-0125
-gpt-3.5-turbo-16k
-gpt-3.5-turbo-16k-0613
-"""
-
-OPENAI_MODELS = [ln.strip() for ln in OPENAI_MODELS.splitlines() if ln.strip()]
-
-ANTHROPIC_MODELS = """
-claude-2
-claude-2.1
-claude-3-haiku-20240307
-claude-3-5-haiku-20241022
-claude-3-opus-20240229
-claude-3-sonnet-20240229
-claude-3-5-sonnet-20240620
-claude-3-5-sonnet-20241022
-claude-sonnet-4-20250514
-claude-opus-4-20250514
-"""
-
-ANTHROPIC_MODELS = [ln.strip() for ln in ANTHROPIC_MODELS.splitlines() if ln.strip()]
-
-# Mapping of model aliases to their canonical names
-MODEL_ALIASES = {
-    # Claude models
-    "sonnet": "anthropic/claude-sonnet-4-20250514",
-    "haiku": "claude-3-5-haiku-20241022",
-    "opus": "claude-opus-4-20250514",
-    # GPT models
-    "4": "gpt-4-0613",
-    "4o": "gpt-4o",
-    "4-turbo": "gpt-4-1106-preview",
-    "35turbo": "gpt-3.5-turbo",
-    "35-turbo": "gpt-3.5-turbo",
-    "3": "gpt-3.5-turbo",
-    # Other models
-    "deepseek": "deepseek/deepseek-chat",
-    "flash": "gemini/gemini-2.5-flash",
-    "flash-lite": "gemini/gemini-2.5-flash-lite",
-    "quasar": "openrouter/openrouter/quasar-alpha",
-    "r1": "deepseek/deepseek-reasoner",
-    "gemini-2.5-pro": "gemini/gemini-2.5-pro",
-    "gemini": "gemini/gemini-2.5-pro",
-    "gemini-exp": "gemini/gemini-2.5-pro-exp-03-25",
-    "grok3": "xai/grok-3-beta",
-    "optimus": "openrouter/openrouter/optimus-alpha",
-}
-# Model metadata loaded from resources and user's files.
-
-
-@dataclass
-class ModelSettings:
-    # Model class needs to have each of these as well
-    name: str
-    edit_format: str = "whole"
-    weak_model_name: Optional[str] = None
-    use_repo_map: bool = False
-    send_undo_reply: bool = False
-    lazy: bool = False
-    overeager: bool = False
-    reminder: str = "user"
-    examples_as_sys_msg: bool = False
-    extra_params: Optional[dict] = None
-    cache_control: bool = False
-    caches_by_default: bool = False
-    use_system_prompt: bool = True
-    use_temperature: Union[bool, float] = True
-    streaming: bool = True
-    editor_model_name: Optional[str] = None
-    editor_edit_format: Optional[str] = None
-    reasoning_tag: Optional[str] = None
-    remove_reasoning: Optional[str] = None  # Deprecated alias for reasoning_tag
-    system_prompt_prefix: Optional[str] = None
-    accepts_settings: Optional[list] = None
-
-
-# Load model settings from package resource
-MODEL_SETTINGS = []
-with importlib.resources.open_text("aider.resources", "model-settings.yml") as f:
-    model_settings_list = yaml.safe_load(f)
-    for model_settings_dict in model_settings_list:
-        MODEL_SETTINGS.append(ModelSettings(**model_settings_dict))
-
-
-class ModelInfoManager:
-    MODEL_INFO_URL = (
-        "https://raw.githubusercontent.com/BerriAI/litellm/main/"
-        "model_prices_and_context_window.json"
-    )
-    CACHE_TTL = 60 * 60 * 24  # 24 hours
-
-    def __init__(self):
-        self.cache_dir = Path.home() / ".aider" / "caches"
-        self.cache_file = self.cache_dir / "model_prices_and_context_window.json"
-        self.content = None
-        self.local_model_metadata = {}
-        self.verify_ssl = True
-        self._cache_loaded = False
-
-        # Manager for the cached OpenRouter model database
-        self.openrouter_manager = OpenRouterModelManager()
-
-    def set_verify_ssl(self, verify_ssl):
-        self.verify_ssl = verify_ssl
-        if hasattr(self, "openrouter_manager"):
-            self.openrouter_manager.set_verify_ssl(verify_ssl)
-
-    def _load_cache(self):
-        if self._cache_loaded:
-            return
-
-        try:
-            self.cache_dir.mkdir(parents=True, exist_ok=True)
-            if self.cache_file.exists():
-                cache_age = time.time() - self.cache_file.stat().st_mtime
-                if cache_age < self.CACHE_TTL:
-                    try:
-                        self.content = json.loads(self.cache_file.read_text())
-                    except json.JSONDecodeError:
-                        # If the cache file is corrupted, treat it as missing
-                        self.content = None
-        except OSError:
-            pass
-
-        self._cache_loaded = True
-
-    def _update_cache(self):
-        try:
-            import requests
-
-            # Respect the --no-verify-ssl switch
-            response = requests.get(self.MODEL_INFO_URL, timeout=5, verify=self.verify_ssl)
-            if response.status_code == 200:
-                self.content = response.json()
-                try:
-                    self.cache_file.write_text(json.dumps(self.content, indent=4))
-                except OSError:
-                    pass
-        except Exception as ex:
-            print(str(ex))
-            try:
-                # Save empty dict to cache file on failure
-                self.cache_file.write_text("{}")
-            except OSError:
-                pass
-
-    def get_model_from_cached_json_db(self, model):
-        data = self.local_model_metadata.get(model)
-        if data:
-            return data
-
-        # Ensure cache is loaded before checking content
-        self._load_cache()
-
-        if not self.content:
-            self._update_cache()
-
-        if not self.content:
-            return dict()
-
-        info = self.content.get(model, dict())
-        if info:
-            return info
-
-        pieces = model.split("/")
-        if len(pieces) == 2:
-            info = self.content.get(pieces[1])
-            if info and info.get("litellm_provider") == pieces[0]:
-                return info
-
-        return dict()
-
-    def get_model_info(self, model):
-        cached_info = self.get_model_from_cached_json_db(model)
-
-        litellm_info = None
-        if litellm._lazy_module or not cached_info:
-            try:
-                litellm_info = litellm.get_model_info(model)
-            except Exception as ex:
-                if "model_prices_and_context_window.json" not in str(ex):
-                    print(str(ex))
-
-        if litellm_info:
-            return litellm_info
-
-        if not cached_info and model.startswith("openrouter/"):
-            # First try using the locally cached OpenRouter model database
-            openrouter_info = self.openrouter_manager.get_model_info(model)
-            if openrouter_info:
-                return openrouter_info
-
-            # Fallback to legacy web-scraping if the API cache does not contain the model
-            openrouter_info = self.fetch_openrouter_model_info(model)
-            if openrouter_info:
-                return openrouter_info
-
-        return cached_info
-
-    def fetch_openrouter_model_info(self, model):
-        """
-        Fetch model info by scraping the openrouter model page.
-        Expected URL: https://openrouter.ai/<model_route>
-        Example: openrouter/qwen/qwen-2.5-72b-instruct:free
-        Returns a dict with keys: max_tokens, max_input_tokens, max_output_tokens,
-        input_cost_per_token, output_cost_per_token.
-        """
-        url_part = model[len("openrouter/") :]
-        url = "https://openrouter.ai/" + url_part
-        try:
-            import requests
-
-            response = requests.get(url, timeout=5, verify=self.verify_ssl)
-            if response.status_code != 200:
-                return {}
-            html = response.text
-            import re
-
-            if re.search(
-                rf"The model\s*.*{re.escape(url_part)}.* is not available", html, re.IGNORECASE
-            ):
-                print(f"\033[91mError: Model '{url_part}' is not available\033[0m")
-                return {}
-            text = re.sub(r"<[^>]+>", " ", html)
-            context_match = re.search(r"([\d,]+)\s*context", text)
-            if context_match:
-                context_str = context_match.group(1).replace(",", "")
-                context_size = int(context_str)
-            else:
-                context_size = None
-            input_cost_match = re.search(r"\$\s*([\d.]+)\s*/M input tokens", text, re.IGNORECASE)
-            output_cost_match = re.search(r"\$\s*([\d.]+)\s*/M output tokens", text, re.IGNORECASE)
-            input_cost = float(input_cost_match.group(1)) / 1000000 if input_cost_match else None
-            output_cost = float(output_cost_match.group(1)) / 1000000 if output_cost_match else None
-            if context_size is None or input_cost is None or output_cost is None:
-                return {}
-            params = {
-                "max_input_tokens": context_size,
-                "max_tokens": context_size,
-                "max_output_tokens": context_size,
-                "input_cost_per_token": input_cost,
-                "output_cost_per_token": output_cost,
-            }
-            return params
-        except Exception as e:
-            print("Error fetching openrouter info:", str(e))
-            return {}
-
-
-model_info_manager = ModelInfoManager()
-
-
-class Model(ModelSettings):
-    def __init__(
-        self, model, weak_model=None, editor_model=None, editor_edit_format=None, verbose=False
-    ):
-        # Map any alias to its canonical name
-        model = MODEL_ALIASES.get(model, model)
-
-        self.name = model
-        self.verbose = verbose
-
-        self.max_chat_history_tokens = 1024
-        self.weak_model = None
-        self.editor_model = None
-
-        # Find the extra settings
-        self.extra_model_settings = next(
-            (ms for ms in MODEL_SETTINGS if ms.name == "aider/extra_params"), None
-        )
-
-        self.info = self.get_model_info(model)
-
-        # Are all needed keys/params available?
-        res = self.validate_environment()
-        self.missing_keys = res.get("missing_keys")
-        self.keys_in_environment = res.get("keys_in_environment")
-
-        max_input_tokens = self.info.get("max_input_tokens") or 0
-        # Calculate max_chat_history_tokens as 1/16th of max_input_tokens,
-        # with minimum 1k and maximum 8k
-        self.max_chat_history_tokens = min(max(max_input_tokens / 16, 1024), 8192)
-
-        self.configure_model_settings(model)
-        if weak_model is False:
-            self.weak_model_name = None
-        else:
-            self.get_weak_model(weak_model)
-
-        if editor_model is False:
-            self.editor_model_name = None
-        else:
-            self.get_editor_model(editor_model, editor_edit_format)
-
-    def get_model_info(self, model):
-        return model_info_manager.get_model_info(model)
-
-    def _copy_fields(self, source):
-        """Helper to copy fields from a ModelSettings instance to self"""
-        for field in fields(ModelSettings):
-            val = getattr(source, field.name)
-            setattr(self, field.name, val)
-
-        # Handle backward compatibility: if remove_reasoning is set but reasoning_tag isn't,
-        # use remove_reasoning's value for reasoning_tag
-        if self.reasoning_tag is None and self.remove_reasoning is not None:
-            self.reasoning_tag = self.remove_reasoning
-
-    def configure_model_settings(self, model):
-        # Look for exact model match
-        exact_match = False
-        for ms in MODEL_SETTINGS:
-            # direct match, or match "provider/<model>"
-            if model == ms.name:
-                self._copy_fields(ms)
-                exact_match = True
-                break  # Continue to apply overrides
-
-        # Initialize accepts_settings if it's None
-        if self.accepts_settings is None:
-            self.accepts_settings = []
-
-        model = model.lower()
-
-        # If no exact match, try generic settings
-        if not exact_match:
-            self.apply_generic_model_settings(model)
-
-        # Apply override settings last if they exist
-        if (
-            self.extra_model_settings
-            and self.extra_model_settings.extra_params
-            and self.extra_model_settings.name == "aider/extra_params"
-        ):
-            # Initialize extra_params if it doesn't exist
-            if not self.extra_params:
-                self.extra_params = {}
-
-            # Deep merge the extra_params dicts
-            for key, value in self.extra_model_settings.extra_params.items():
-                if isinstance(value, dict) and isinstance(self.extra_params.get(key), dict):
-                    # For nested dicts, merge recursively
-                    self.extra_params[key] = {**self.extra_params[key], **value}
-                else:
-                    # For non-dict values, simply update
-                    self.extra_params[key] = value
-
-        # Ensure OpenRouter models accept thinking_tokens and reasoning_effort
-        if self.name.startswith("openrouter/"):
-            if self.accepts_settings is None:
-                self.accepts_settings = []
-            if "thinking_tokens" not in self.accepts_settings:
-                self.accepts_settings.append("thinking_tokens")
-            if "reasoning_effort" not in self.accepts_settings:
-                self.accepts_settings.append("reasoning_effort")
-
-    def apply_generic_model_settings(self, model):
-        if "/o3-mini" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.use_temperature = False
-            self.system_prompt_prefix = "Formatting re-enabled. "
-            self.system_prompt_prefix = "Formatting re-enabled. "
-            if "reasoning_effort" not in self.accepts_settings:
-                self.accepts_settings.append("reasoning_effort")
-            return  # <--
-
-        if "gpt-4.1-mini" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.reminder = "sys"
-            self.examples_as_sys_msg = False
-            return  # <--
-
-        if "gpt-4.1" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.reminder = "sys"
-            self.examples_as_sys_msg = False
-            return  # <--
-
-        last_segment = model.split("/")[-1]
-        if last_segment in ("gpt-5", "gpt-5-2025-08-07"):
-            self.use_temperature = False
-            self.edit_format = "diff"
-            if "reasoning_effort" not in self.accepts_settings:
-                self.accepts_settings.append("reasoning_effort")
-            return  # <--
-
-        if "/o1-mini" in model:
-            self.use_repo_map = True
-            self.use_temperature = False
-            self.use_system_prompt = False
-            return  # <--
-
-        if "/o1-preview" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.use_temperature = False
-            self.use_system_prompt = False
-            return  # <--
-
-        if "/o1" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.use_temperature = False
-            self.streaming = False
-            self.system_prompt_prefix = "Formatting re-enabled. "
-            if "reasoning_effort" not in self.accepts_settings:
-                self.accepts_settings.append("reasoning_effort")
-            return  # <--
-
-        if "deepseek" in model and "v3" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.reminder = "sys"
-            self.examples_as_sys_msg = True
-            return  # <--
-
-        if "deepseek" in model and ("r1" in model or "reasoning" in model):
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.examples_as_sys_msg = True
-            self.use_temperature = False
-            self.reasoning_tag = "think"
-            return  # <--
-
-        if ("llama3" in model or "llama-3" in model) and "70b" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.send_undo_reply = True
-            self.examples_as_sys_msg = True
-            return  # <--
-
-        if "gpt-4-turbo" in model or ("gpt-4-" in model and "-preview" in model):
-            self.edit_format = "udiff"
-            self.use_repo_map = True
-            self.send_undo_reply = True
-            return  # <--
-
-        if "gpt-4" in model or "claude-3-opus" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.send_undo_reply = True
-            return  # <--
-
-        if "gpt-3.5" in model or "gpt-4" in model:
-            self.reminder = "sys"
-            return  # <--
-
-        if "3-7-sonnet" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.examples_as_sys_msg = True
-            self.reminder = "user"
-            if "thinking_tokens" not in self.accepts_settings:
-                self.accepts_settings.append("thinking_tokens")
-            return  # <--
-
-        if "3.5-sonnet" in model or "3-5-sonnet" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.examples_as_sys_msg = True
-            self.reminder = "user"
-            return  # <--
-
-        if model.startswith("o1-") or "/o1-" in model:
-            self.use_system_prompt = False
-            self.use_temperature = False
-            return  # <--
-
-        if (
-            "qwen" in model
-            and "coder" in model
-            and ("2.5" in model or "2-5" in model)
-            and "32b" in model
-        ):
-            self.edit_format = "diff"
-            self.editor_edit_format = "editor-diff"
-            self.use_repo_map = True
-            return  # <--
-
-        if "qwq" in model and "32b" in model and "preview" not in model:
-            self.edit_format = "diff"
-            self.editor_edit_format = "editor-diff"
-            self.use_repo_map = True
-            self.reasoning_tag = "think"
-            self.examples_as_sys_msg = True
-            self.use_temperature = 0.6
-            self.extra_params = dict(top_p=0.95)
-            return  # <--
-
-        if "qwen3" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            if "235b" in model:
-                self.system_prompt_prefix = "/no_think"
-                self.use_temperature = 0.7
-                self.extra_params = {"top_p": 0.8, "top_k": 20, "min_p": 0.0}
-            else:
-                self.examples_as_sys_msg = True
-                self.use_temperature = 0.6
-                self.reasoning_tag = "think"
-                self.extra_params = {"top_p": 0.95, "top_k": 20, "min_p": 0.0}
-            return  # <--
-
-        # use the defaults
-        if self.edit_format == "diff":
-            self.use_repo_map = True
-            return  # <--
-
-    def __str__(self):
-        return self.name
-
-    def get_weak_model(self, provided_weak_model_name):
-        # If weak_model_name is provided, override the model settings
-        if provided_weak_model_name:
-            self.weak_model_name = provided_weak_model_name
-
-        if not self.weak_model_name:
-            self.weak_model = self
-            return
-
-        if self.weak_model_name == self.name:
-            self.weak_model = self
-            return
-
-        self.weak_model = Model(
-            self.weak_model_name,
-            weak_model=False,
-        )
-        return self.weak_model
-
-    def commit_message_models(self):
-        return [self.weak_model, self]
-
-    def get_editor_model(self, provided_editor_model_name, editor_edit_format):
-        # If editor_model_name is provided, override the model settings
-        if provided_editor_model_name:
-            self.editor_model_name = provided_editor_model_name
-        if editor_edit_format:
-            self.editor_edit_format = editor_edit_format
-
-        if not self.editor_model_name or self.editor_model_name == self.name:
-            self.editor_model = self
-        else:
-            self.editor_model = Model(
-                self.editor_model_name,
-                editor_model=False,
-            )
-
-        if not self.editor_edit_format:
-            self.editor_edit_format = self.editor_model.edit_format
-            if self.editor_edit_format in ("diff", "whole", "diff-fenced"):
-                self.editor_edit_format = "editor-" + self.editor_edit_format
-
-        return self.editor_model
-
-    def tokenizer(self, text):
-        return litellm.encode(model=self.name, text=text)
-
-    def token_count(self, messages):
-        if isinstance(messages, dict):
-            messages = [messages]
-
-        if isinstance(messages, list):
-            try:
-                return litellm.token_counter(model=self.name, messages=messages)
-            except Exception:
-                pass  # fall back to raw tokenizer
-
-        if not self.tokenizer:
-            return 0
-
-        if isinstance(messages, str):
-            msgs = messages
-        else:
-            msgs = json.dumps(messages)
-
-        try:
-            return len(self.tokenizer(msgs))
-        except Exception as err:
-            print(f"Unable to count tokens with tokenizer: {err}")
-            return 0
-
-    def token_count_for_image(self, fname):
-        """
-        Calculate the token cost for an image assuming high detail.
-        The token cost is determined by the size of the image.
-        :param fname: The filename of the image.
-        :return: The token cost for the image.
-        """
-        width, height = self.get_image_size(fname)
-
-        # If the image is larger than 2048 in any dimension, scale it down to fit within 2048x2048
-        max_dimension = max(width, height)
-        if max_dimension > 2048:
-            scale_factor = 2048 / max_dimension
-            width = int(width * scale_factor)
-            height = int(height * scale_factor)
-
-        # Scale the image such that the shortest side is 768 pixels long
-        min_dimension = min(width, height)
-        scale_factor = 768 / min_dimension
-        width = int(width * scale_factor)
-        height = int(height * scale_factor)
-
-        # Calculate the number of 512x512 tiles needed to cover the image
-        tiles_width = math.ceil(width / 512)
-        tiles_height = math.ceil(height / 512)
-        num_tiles = tiles_width * tiles_height
-
-        # Each tile costs 170 tokens, and there's an additional fixed cost of 85 tokens
-        token_cost = num_tiles * 170 + 85
-        return token_cost
-
-    def get_image_size(self, fname):
-        """
-        Retrieve the size of an image.
-        :param fname: The filename of the image.
-        :return: A tuple (width, height) representing the image size in pixels.
-        """
-        with Image.open(fname) as img:
-            return img.size
-
-    def fast_validate_environment(self):
-        """Fast path for common models. Avoids forcing litellm import."""
-
-        model = self.name
-
-        pieces = model.split("/")
-        if len(pieces) > 1:
-            provider = pieces[0]
-        else:
-            provider = None
-
-        keymap = dict(
-            openrouter="OPENROUTER_API_KEY",
-            openai="OPENAI_API_KEY",
-            deepseek="DEEPSEEK_API_KEY",
-            gemini="GEMINI_API_KEY",
-            anthropic="ANTHROPIC_API_KEY",
-            groq="GROQ_API_KEY",
-            fireworks_ai="FIREWORKS_API_KEY",
-        )
-        var = None
-        if model in OPENAI_MODELS:
-            var = "OPENAI_API_KEY"
-        elif model in ANTHROPIC_MODELS:
-            var = "ANTHROPIC_API_KEY"
-        else:
-            var = keymap.get(provider)
-
-        if var and os.environ.get(var):
-            return dict(keys_in_environment=[var], missing_keys=[])
-
-    def validate_environment(self):
-        res = self.fast_validate_environment()
-        if res:
-            return res
-
-        # https://github.com/BerriAI/litellm/issues/3190
-
-        model = self.name
-        res = litellm.validate_environment(model)
-
-        # If missing AWS credential keys but AWS_PROFILE is set, consider AWS credentials valid
-        if res["missing_keys"] and any(
-            key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] for key in res["missing_keys"]
-        ):
-            if model.startswith("bedrock/") or model.startswith("us.anthropic."):
-                if os.environ.get("AWS_PROFILE"):
-                    res["missing_keys"] = [
-                        k
-                        for k in res["missing_keys"]
-                        if k not in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"]
-                    ]
-                    if not res["missing_keys"]:
-                        res["keys_in_environment"] = True
-
-        if res["keys_in_environment"]:
-            return res
-        if res["missing_keys"]:
-            return res
-
-        provider = self.info.get("litellm_provider", "").lower()
-        if provider == "cohere_chat":
-            return validate_variables(["COHERE_API_KEY"])
-        if provider == "gemini":
-            return validate_variables(["GEMINI_API_KEY"])
-        if provider == "groq":
-            return validate_variables(["GROQ_API_KEY"])
-
-        return res
-
-    def get_repo_map_tokens(self):
-        map_tokens = 1024
-        max_inp_tokens = self.info.get("max_input_tokens")
-        if max_inp_tokens:
-            map_tokens = max_inp_tokens / 8
-            map_tokens = min(map_tokens, 4096)
-            map_tokens = max(map_tokens, 1024)
-        return map_tokens
-
-    def set_reasoning_effort(self, effort):
-        """Set the reasoning effort parameter for models that support it"""
-        if effort is not None:
-            if self.name.startswith("openrouter/"):
-                if not self.extra_params:
-                    self.extra_params = {}
-                if "extra_body" not in self.extra_params:
-                    self.extra_params["extra_body"] = {}
-                self.extra_params["extra_body"]["reasoning"] = {"effort": effort}
-            else:
-                if not self.extra_params:
-                    self.extra_params = {}
-                if "extra_body" not in self.extra_params:
-                    self.extra_params["extra_body"] = {}
-                self.extra_params["extra_body"]["reasoning_effort"] = effort
-
-    def parse_token_value(self, value):
-        """
-        Parse a token value string into an integer.
-        Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc.
-
-        Args:
-            value: String or int token value
-
-        Returns:
-            Integer token value
-        """
-        if isinstance(value, int):
-            return value
-
-        if not isinstance(value, str):
-            return int(value)  # Try to convert to int
-
-        value = value.strip().upper()
-
-        if value.endswith("K"):
-            multiplier = 1024
-            value = value[:-1]
-        elif value.endswith("M"):
-            multiplier = 1024 * 1024
-            value = value[:-1]
-        else:
-            multiplier = 1
-
-        # Convert to float first to handle decimal values like "10.5k"
-        return int(float(value) * multiplier)
-
-    def set_thinking_tokens(self, value):
-        """
-        Set the thinking token budget for models that support it.
-        Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc.
-        Pass "0" to disable thinking tokens.
-        """
-        if value is not None:
-            num_tokens = self.parse_token_value(value)
-            self.use_temperature = False
-            if not self.extra_params:
-                self.extra_params = {}
-
-            # OpenRouter models use 'reasoning' instead of 'thinking'
-            if self.name.startswith("openrouter/"):
-                if "extra_body" not in self.extra_params:
-                    self.extra_params["extra_body"] = {}
-                if num_tokens > 0:
-                    self.extra_params["extra_body"]["reasoning"] = {"max_tokens": num_tokens}
-                else:
-                    if "reasoning" in self.extra_params["extra_body"]:
-                        del self.extra_params["extra_body"]["reasoning"]
-            else:
-                if num_tokens > 0:
-                    self.extra_params["thinking"] = {"type": "enabled", "budget_tokens": num_tokens}
-                else:
-                    if "thinking" in self.extra_params:
-                        del self.extra_params["thinking"]
-
-    def get_raw_thinking_tokens(self):
-        """Get formatted thinking token budget if available"""
-        budget = None
-
-        if self.extra_params:
-            # Check for OpenRouter reasoning format
-            if self.name.startswith("openrouter/"):
-                if (
-                    "extra_body" in self.extra_params
-                    and "reasoning" in self.extra_params["extra_body"]
-                    and "max_tokens" in self.extra_params["extra_body"]["reasoning"]
-                ):
-                    budget = self.extra_params["extra_body"]["reasoning"]["max_tokens"]
-            # Check for standard thinking format
-            elif (
-                "thinking" in self.extra_params and "budget_tokens" in self.extra_params["thinking"]
-            ):
-                budget = self.extra_params["thinking"]["budget_tokens"]
-
-        return budget
-
-    def get_thinking_tokens(self):
-        budget = self.get_raw_thinking_tokens()
-
-        if budget is not None:
-            # Format as xx.yK for thousands, xx.yM for millions
-            if budget >= 1024 * 1024:
-                value = budget / (1024 * 1024)
-                if value == int(value):
-                    return f"{int(value)}M"
-                else:
-                    return f"{value:.1f}M"
-            else:
-                value = budget / 1024
-                if value == int(value):
-                    return f"{int(value)}k"
-                else:
-                    return f"{value:.1f}k"
-        return None
-
-    def get_reasoning_effort(self):
-        """Get reasoning effort value if available"""
-        if self.extra_params:
-            # Check for OpenRouter reasoning format
-            if self.name.startswith("openrouter/"):
-                if (
-                    "extra_body" in self.extra_params
-                    and "reasoning" in self.extra_params["extra_body"]
-                    and "effort" in self.extra_params["extra_body"]["reasoning"]
-                ):
-                    return self.extra_params["extra_body"]["reasoning"]["effort"]
-            # Check for standard reasoning_effort format (e.g. in extra_body)
-            elif (
-                "extra_body" in self.extra_params
-                and "reasoning_effort" in self.extra_params["extra_body"]
-            ):
-                return self.extra_params["extra_body"]["reasoning_effort"]
-        return None
-
-    def is_deepseek(self):
-        name = self.name.lower()
-        if "deepseek" not in name:
-            return
-        return True
-
-    def is_ollama(self):
-        return self.name.startswith("ollama/") or self.name.startswith("ollama_chat/")
-
-    async def send_completion(
-        self, messages, functions, stream, temperature=None, tools=None, max_tokens=None
-    ):
-        if os.environ.get("AIDER_SANITY_CHECK_TURNS"):
-            sanity_check_messages(messages)
-
-        messages = ensure_alternating_roles(messages)
-
-        if self.verbose:
-            for message in messages:
-                msg_role = message.get("role")
-                msg_content = message.get("content") if message.get("content") else ""
-                msg_trunc = ""
-
-                if message.get("content"):
-                    msg_trunc = message.get("content")[:30]
-
-                print(f"{msg_role} ({len(msg_content)}): {msg_trunc}")
-
-        kwargs = dict(model=self.name, stream=stream)
-
-        if self.use_temperature is not False:
-            if temperature is None:
-                if isinstance(self.use_temperature, bool):
-                    temperature = 0
-                else:
-                    temperature = float(self.use_temperature)
-
-            kwargs["temperature"] = temperature
-
-        # `tools` is for modern tool usage. `functions` is for legacy/forced calls.
-        # This handles `base_coder` sending both with same content for `navigator_coder`.
-        effective_tools = tools
-
-        if effective_tools is None and functions:
-            # Convert legacy `functions` to `tools` format if `tools` isn't provided.
-            effective_tools = [dict(type="function", function=f) for f in functions]
-
-        if effective_tools:
-            kwargs["tools"] = effective_tools
-
-        # Forcing a function call is for legacy style `functions` with a single function.
-        # This is used by ArchitectCoder and not intended for NavigatorCoder's tools.
-        if functions and len(functions) == 1:
-            function = functions[0]
-
-            if "name" in function:
-                tool_name = function.get("name")
-                if tool_name:
-                    kwargs["tool_choice"] = {"type": "function", "function": {"name": tool_name}}
-
-        if self.extra_params:
-            kwargs.update(self.extra_params)
-
-        if max_tokens:
-            kwargs["max_tokens"] = max_tokens
-
-        if "max_tokens" in kwargs and kwargs["max_tokens"]:
-            kwargs["max_completion_tokens"] = kwargs.pop("max_tokens")
-        if self.is_ollama() and "num_ctx" not in kwargs:
-            num_ctx = int(self.token_count(messages) * 1.25) + 8192
-            kwargs["num_ctx"] = num_ctx
-
-        key = json.dumps(kwargs, sort_keys=True).encode()
-        # dump(kwargs)
-
-        hash_object = hashlib.sha1(key)
-        if "timeout" not in kwargs:
-            kwargs["timeout"] = request_timeout
-        if self.verbose:
-            dump(kwargs)
-        kwargs["messages"] = messages
-
-        # Are we using github copilot?
-        if "GITHUB_COPILOT_TOKEN" in os.environ or self.name.startswith("github_copilot/"):
-            if "extra_headers" not in kwargs:
-                kwargs["extra_headers"] = {
-                    "Editor-Version": f"aider/{__version__}",
-                    "Copilot-Integration-Id": "vscode-chat",
-                }
-
-        try:
-            res = await litellm.acompletion(**kwargs)
-        except Exception as err:
-            print(f"LiteLLM API Error: {str(err)}")
-            res = self.model_error_response()
-
-            if self.verbose:
-                print(f"LiteLLM API Error: {str(err)}")
-                raise
-
-        return hash_object, res
-
-    async def simple_send_with_retries(self, messages, max_tokens=None):
-        from aider.exceptions import LiteLLMExceptions
-
-        litellm_ex = LiteLLMExceptions()
-        if "deepseek-reasoner" in self.name:
-            messages = ensure_alternating_roles(messages)
-        retry_delay = 0.125
-
-        if self.verbose:
-            dump(messages)
-
-        while True:
-            try:
-                _hash, response = await self.send_completion(
-                    messages=messages,
-                    functions=None,
-                    stream=False,
-                    max_tokens=max_tokens,
-                )
-                if not response or not hasattr(response, "choices") or not response.choices:
-                    return None
-                res = response.choices[0].message.content
-                from aider.reasoning_tags import remove_reasoning_content
-
-                return remove_reasoning_content(res, self.reasoning_tag)
-
-            except litellm_ex.exceptions_tuple() as err:
-                ex_info = litellm_ex.get_ex_info(err)
-                print(str(err))
-                if ex_info.description:
-                    print(ex_info.description)
-                should_retry = ex_info.retry
-                if should_retry:
-                    retry_delay *= 2
-                    if retry_delay > RETRY_TIMEOUT:
-                        should_retry = False
-                if not should_retry:
-                    return None
-                print(f"Retrying in {retry_delay:.1f} seconds...")
-                time.sleep(retry_delay)
-                continue
-            except AttributeError:
-                return None
-
-    async def model_error_response(self):
-        for i in range(1):
-            await asyncio.sleep(0.1)
-            yield litellm.ModelResponse(
-                choices=[
-                    litellm.Choices(
-                        finish_reason="stop",
-                        index=0,
-                        message=litellm.Message(
-                            content="Model API Response Error. Please retry the previous request"
-                        ),  # Provide an empty message object
-                    )
-                ],
-                model=self.name,
-            )
-
-
-def register_models(model_settings_fnames):
-    files_loaded = []
-    for model_settings_fname in model_settings_fnames:
-        if not os.path.exists(model_settings_fname):
-            continue
-
-        if not Path(model_settings_fname).read_text().strip():
-            continue
-
-        try:
-            with open(model_settings_fname, "r") as model_settings_file:
-                model_settings_list = yaml.safe_load(model_settings_file)
-
-            for model_settings_dict in model_settings_list:
-                model_settings = ModelSettings(**model_settings_dict)
-
-                # Remove all existing settings for this model name
-                MODEL_SETTINGS[:] = [ms for ms in MODEL_SETTINGS if ms.name != model_settings.name]
-                # Add the new settings
-                MODEL_SETTINGS.append(model_settings)
-        except Exception as e:
-            raise Exception(f"Error loading model settings from {model_settings_fname}: {e}")
-        files_loaded.append(model_settings_fname)
-
-    return files_loaded
-
-
-def register_litellm_models(model_fnames):
-    files_loaded = []
-    for model_fname in model_fnames:
-        if not os.path.exists(model_fname):
-            continue
-
-        try:
-            data = Path(model_fname).read_text()
-            if not data.strip():
-                continue
-            model_def = json5.loads(data)
-            if not model_def:
-                continue
-
-            # Defer registration with litellm to faster path.
-            model_info_manager.local_model_metadata.update(model_def)
-        except Exception as e:
-            raise Exception(f"Error loading model definition from {model_fname}: {e}")
-
-        files_loaded.append(model_fname)
-
-    return files_loaded
-
-
-def validate_variables(vars):
-    missing = []
-    for var in vars:
-        if var not in os.environ:
-            missing.append(var)
-    if missing:
-        return dict(keys_in_environment=False, missing_keys=missing)
-    return dict(keys_in_environment=True, missing_keys=missing)
-
-
-def sanity_check_models(io, main_model):
-    problem_main = sanity_check_model(io, main_model)
-
-    problem_weak = None
-    if main_model.weak_model and main_model.weak_model is not main_model:
-        problem_weak = sanity_check_model(io, main_model.weak_model)
-
-    problem_editor = None
-    if (
-        main_model.editor_model
-        and main_model.editor_model is not main_model
-        and main_model.editor_model is not main_model.weak_model
-    ):
-        problem_editor = sanity_check_model(io, main_model.editor_model)
-
-    return problem_main or problem_weak or problem_editor
-
-
-def sanity_check_model(io, model):
-    show = False
-
-    if model.missing_keys:
-        show = True
-        io.tool_warning(f"Warning: {model} expects these environment variables")
-        for key in model.missing_keys:
-            value = os.environ.get(key, "")
-            status = "Set" if value else "Not set"
-            io.tool_output(f"- {key}: {status}")
-
-        if platform.system() == "Windows":
-            io.tool_output(
-                "Note: You may need to restart your terminal or command prompt for `setx` to take"
-                " effect."
-            )
-
-    elif not model.keys_in_environment:
-        show = True
-        io.tool_warning(f"Warning for {model}: Unknown which environment variables are required.")
-
-    # Check for model-specific dependencies
-    check_for_dependencies(io, model.name)
-
-    if not model.info:
-        show = True
-        io.tool_warning(
-            f"Warning for {model}: Unknown context window size and costs, using sane defaults."
-        )
-
-        possible_matches = fuzzy_match_models(model.name)
-        if possible_matches:
-            io.tool_output("Did you mean one of these?")
-            for match in possible_matches:
-                io.tool_output(f"- {match}")
-
-    return show
-
-
-def check_for_dependencies(io, model_name):
-    """
-    Check for model-specific dependencies and install them if needed.
-
-    Args:
-        io: The IO object for user interaction
-        model_name: The name of the model to check dependencies for
-    """
-    # Check if this is a Bedrock model and ensure boto3 is installed
-    if model_name.startswith("bedrock/"):
-        check_pip_install_extra(
-            io, "boto3", "AWS Bedrock models require the boto3 package.", ["boto3"]
-        )
-
-    # Check if this is a Vertex AI model and ensure google-cloud-aiplatform is installed
-    elif model_name.startswith("vertex_ai/"):
-        check_pip_install_extra(
-            io,
-            "google.cloud.aiplatform",
-            "Google Vertex AI models require the google-cloud-aiplatform package.",
-            ["google-cloud-aiplatform"],
-        )
-
-
-def fuzzy_match_models(name):
-    name = name.lower()
-
-    chat_models = set()
-    model_metadata = list(litellm.model_cost.items())
-    model_metadata += list(model_info_manager.local_model_metadata.items())
-
-    for orig_model, attrs in model_metadata:
-        model = orig_model.lower()
-        if attrs.get("mode") != "chat":
-            continue
-        provider = attrs.get("litellm_provider", "").lower()
-        if not provider:
-            continue
-        provider += "/"
-
-        if model.startswith(provider):
-            fq_model = orig_model
-        else:
-            fq_model = provider + orig_model
-
-        chat_models.add(fq_model)
-        chat_models.add(orig_model)
-
-    chat_models = sorted(chat_models)
-    # exactly matching model
-    # matching_models = [
-    #    (fq,m) for fq,m in chat_models
-    #    if name == fq or name == m
-    # ]
-    # if matching_models:
-    #    return matching_models
-
-    # Check for model names containing the name
-    matching_models = [m for m in chat_models if name in m]
-    if matching_models:
-        return sorted(set(matching_models))
-
-    # Check for slight misspellings
-    models = set(chat_models)
-    matching_models = difflib.get_close_matches(name, models, n=3, cutoff=0.8)
-
-    return sorted(set(matching_models))
-
-
-def print_matching_models(io, search):
-    matches = fuzzy_match_models(search)
-    if matches:
-        io.tool_output(f'Models which match "{search}":')
-        for model in matches:
-            io.tool_output(f"- {model}")
-    else:
-        io.tool_output(f'No models match "{search}".')
-
-
-def get_model_settings_as_yaml():
-    from dataclasses import fields
-
-    import yaml
-
-    model_settings_list = []
-    # Add default settings first with all field values
-    defaults = {}
-    for field in fields(ModelSettings):
-        defaults[field.name] = field.default
-    defaults["name"] = "(default values)"
-    model_settings_list.append(defaults)
-
-    # Sort model settings by name
-    for ms in sorted(MODEL_SETTINGS, key=lambda x: x.name):
-        # Create dict with explicit field order
-        model_settings_dict = {}
-        for field in fields(ModelSettings):
-            value = getattr(ms, field.name)
-            if value != field.default:
-                model_settings_dict[field.name] = value
-        model_settings_list.append(model_settings_dict)
-        # Add blank line between entries
-        model_settings_list.append(None)
-
-    # Filter out None values before dumping
-    yaml_str = yaml.dump(
-        [ms for ms in model_settings_list if ms is not None],
-        default_flow_style=False,
-        sort_keys=False,  # Preserve field order from dataclass
-    )
-    # Add actual blank lines between entries
-    return yaml_str.replace("\n- ", "\n\n- ")
-
-
-def main():
-    if len(sys.argv) < 2:
-        print("Usage: python models.py <model_name> or python models.py --yaml")
-        sys.exit(1)
-
-    if sys.argv[1] == "--yaml":
-        yaml_string = get_model_settings_as_yaml()
-        print(yaml_string)
-    else:
-        model_name = sys.argv[1]
-        matching_models = fuzzy_match_models(model_name)
-
-        if matching_models:
-            print(f"Matching models for '{model_name}':")
-            for model in matching_models:
-                print(model)
-        else:
-            print(f"No matching models found for '{model_name}'.")
-
-
-if __name__ == "__main__":
-    main()
+import asyncio
+import difflib
+import hashlib
+import importlib.resources
+import json
+import math
+import os
+import platform
+import sys
+import time
+from dataclasses import dataclass, fields
+from pathlib import Path
+from typing import Optional, Union
+
+import json5
+import yaml
+from PIL import Image
+
+from aider import __version__
+from aider.dump import dump  # noqa: F401
+from aider.llm import litellm
+from aider.openrouter import OpenRouterModelManager
+from aider.sendchat import ensure_alternating_roles, sanity_check_messages
+from aider.utils import check_pip_install_extra
+
+RETRY_TIMEOUT = 60
+
+request_timeout = 600
+
+DEFAULT_MODEL_NAME = "gpt-4o"
+ANTHROPIC_BETA_HEADER = "prompt-caching-2024-07-31,pdfs-2024-09-25"
+
+OPENAI_MODELS = """
+o1
+o1-preview
+o1-mini
+o3-mini
+gpt-4
+gpt-4o
+gpt-4o-2024-05-13
+gpt-4-turbo-preview
+gpt-4-0314
+gpt-4-0613
+gpt-4-32k
+gpt-4-32k-0314
+gpt-4-32k-0613
+gpt-4-turbo
+gpt-4-turbo-2024-04-09
+gpt-4-1106-preview
+gpt-4-0125-preview
+gpt-4-vision-preview
+gpt-4-1106-vision-preview
+gpt-4o-mini
+gpt-4o-mini-2024-07-18
+gpt-3.5-turbo
+gpt-3.5-turbo-0301
+gpt-3.5-turbo-0613
+gpt-3.5-turbo-1106
+gpt-3.5-turbo-0125
+gpt-3.5-turbo-16k
+gpt-3.5-turbo-16k-0613
+"""
+
+OPENAI_MODELS = [ln.strip() for ln in OPENAI_MODELS.splitlines() if ln.strip()]
+
+ANTHROPIC_MODELS = """
+claude-2
+claude-2.1
+claude-3-haiku-20240307
+claude-3-5-haiku-20241022
+claude-3-opus-20240229
+claude-3-sonnet-20240229
+claude-3-5-sonnet-20240620
+claude-3-5-sonnet-20241022
+claude-sonnet-4-20250514
+claude-opus-4-20250514
+"""
+
+ANTHROPIC_MODELS = [ln.strip() for ln in ANTHROPIC_MODELS.splitlines() if ln.strip()]
+
+# Mapping of model aliases to their canonical names
+MODEL_ALIASES = {
+    # Claude models
+    "sonnet": "anthropic/claude-sonnet-4-20250514",
+    "haiku": "claude-3-5-haiku-20241022",
+    "opus": "claude-opus-4-20250514",
+    # GPT models
+    "4": "gpt-4-0613",
+    "4o": "gpt-4o",
+    "4-turbo": "gpt-4-1106-preview",
+    "35turbo": "gpt-3.5-turbo",
+    "35-turbo": "gpt-3.5-turbo",
+    "3": "gpt-3.5-turbo",
+    # Other models
+    "deepseek": "deepseek/deepseek-chat",
+    "flash": "gemini/gemini-2.5-flash",
+    "flash-lite": "gemini/gemini-2.5-flash-lite",
+    "quasar": "openrouter/openrouter/quasar-alpha",
+    "r1": "deepseek/deepseek-reasoner",
+    "gemini-2.5-pro": "gemini/gemini-2.5-pro",
+    "gemini": "gemini/gemini-2.5-pro",
+    "gemini-exp": "gemini/gemini-2.5-pro-exp-03-25",
+    "grok3": "xai/grok-3-beta",
+    "optimus": "openrouter/openrouter/optimus-alpha",
+}
+# Model metadata loaded from resources and user's files.
+
+
+@dataclass
+class ModelSettings:
+    # Model class needs to have each of these as well
+    name: str
+    edit_format: str = "whole"
+    weak_model_name: Optional[str] = None
+    use_repo_map: bool = False
+    send_undo_reply: bool = False
+    lazy: bool = False
+    overeager: bool = False
+    reminder: str = "user"
+    examples_as_sys_msg: bool = False
+    extra_params: Optional[dict] = None
+    cache_control: bool = False
+    caches_by_default: bool = False
+    use_system_prompt: bool = True
+    use_temperature: Union[bool, float] = True
+    streaming: bool = True
+    editor_model_name: Optional[str] = None
+    editor_edit_format: Optional[str] = None
+    reasoning_tag: Optional[str] = None
+    remove_reasoning: Optional[str] = None  # Deprecated alias for reasoning_tag
+    system_prompt_prefix: Optional[str] = None
+    accepts_settings: Optional[list] = None
+
+
+# Load model settings from package resource
+MODEL_SETTINGS = []
+with importlib.resources.open_text("aider.resources", "model-settings.yml") as f:
+    model_settings_list = yaml.safe_load(f)
+    for model_settings_dict in model_settings_list:
+        MODEL_SETTINGS.append(ModelSettings(**model_settings_dict))
+
+
+class ModelInfoManager:
+    MODEL_INFO_URL = (
+        "https://raw.githubusercontent.com/BerriAI/litellm/main/"
+        "model_prices_and_context_window.json"
+    )
+    CACHE_TTL = 60 * 60 * 24  # 24 hours
+
+    def __init__(self):
+        self.cache_dir = Path.home() / ".aider" / "caches"
+        self.cache_file = self.cache_dir / "model_prices_and_context_window.json"
+        self.content = None
+        self.local_model_metadata = {}
+        self.verify_ssl = True
+        self._cache_loaded = False
+
+        # Manager for the cached OpenRouter model database
+        self.openrouter_manager = OpenRouterModelManager()
+
+    def set_verify_ssl(self, verify_ssl):
+        self.verify_ssl = verify_ssl
+        if hasattr(self, "openrouter_manager"):
+            self.openrouter_manager.set_verify_ssl(verify_ssl)
+
+    def _load_cache(self):
+        if self._cache_loaded:
+            return
+
+        try:
+            self.cache_dir.mkdir(parents=True, exist_ok=True)
+            if self.cache_file.exists():
+                cache_age = time.time() - self.cache_file.stat().st_mtime
+                if cache_age < self.CACHE_TTL:
+                    try:
+                        self.content = json.loads(self.cache_file.read_text())
+                    except json.JSONDecodeError:
+                        # If the cache file is corrupted, treat it as missing
+                        self.content = None
+        except OSError:
+            pass
+
+        self._cache_loaded = True
+
+    def _update_cache(self):
+        try:
+            import requests
+
+            # Respect the --no-verify-ssl switch
+            response = requests.get(self.MODEL_INFO_URL, timeout=5, verify=self.verify_ssl)
+            if response.status_code == 200:
+                self.content = response.json()
+                try:
+                    self.cache_file.write_text(json.dumps(self.content, indent=4))
+                except OSError:
+                    pass
+        except Exception as ex:
+            print(str(ex))
+            try:
+                # Save empty dict to cache file on failure
+                self.cache_file.write_text("{}")
+            except OSError:
+                pass
+
+    def get_model_from_cached_json_db(self, model):
+        data = self.local_model_metadata.get(model)
+        if data:
+            return data
+
+        # Ensure cache is loaded before checking content
+        self._load_cache()
+
+        if not self.content:
+            self._update_cache()
+
+        if not self.content:
+            return dict()
+
+        info = self.content.get(model, dict())
+        if info:
+            return info
+
+        pieces = model.split("/")
+        if len(pieces) == 2:
+            info = self.content.get(pieces[1])
+            if info and info.get("litellm_provider") == pieces[0]:
+                return info
+
+        return dict()
+
+    def get_model_info(self, model):
+        cached_info = self.get_model_from_cached_json_db(model)
+
+        litellm_info = None
+        if litellm._lazy_module or not cached_info:
+            try:
+                litellm_info = litellm.get_model_info(model)
+            except Exception as ex:
+                if "model_prices_and_context_window.json" not in str(ex):
+                    print(str(ex))
+
+        if litellm_info:
+            return litellm_info
+
+        if not cached_info and model.startswith("openrouter/"):
+            # First try using the locally cached OpenRouter model database
+            openrouter_info = self.openrouter_manager.get_model_info(model)
+            if openrouter_info:
+                return openrouter_info
+
+            # Fallback to legacy web-scraping if the API cache does not contain the model
+            openrouter_info = self.fetch_openrouter_model_info(model)
+            if openrouter_info:
+                return openrouter_info
+
+        return cached_info
+
+    def fetch_openrouter_model_info(self, model):
+        """
+        Fetch model info by scraping the openrouter model page.
+        Expected URL: https://openrouter.ai/<model_route>
+        Example: openrouter/qwen/qwen-2.5-72b-instruct:free
+        Returns a dict with keys: max_tokens, max_input_tokens, max_output_tokens,
+        input_cost_per_token, output_cost_per_token.
+        """
+        url_part = model[len("openrouter/") :]
+        url = "https://openrouter.ai/" + url_part
+        try:
+            import requests
+
+            response = requests.get(url, timeout=5, verify=self.verify_ssl)
+            if response.status_code != 200:
+                return {}
+            html = response.text
+            import re
+
+            if re.search(
+                rf"The model\s*.*{re.escape(url_part)}.* is not available", html, re.IGNORECASE
+            ):
+                print(f"\033[91mError: Model '{url_part}' is not available\033[0m")
+                return {}
+            text = re.sub(r"<[^>]+>", " ", html)
+            context_match = re.search(r"([\d,]+)\s*context", text)
+            if context_match:
+                context_str = context_match.group(1).replace(",", "")
+                context_size = int(context_str)
+            else:
+                context_size = None
+            input_cost_match = re.search(r"\$\s*([\d.]+)\s*/M input tokens", text, re.IGNORECASE)
+            output_cost_match = re.search(r"\$\s*([\d.]+)\s*/M output tokens", text, re.IGNORECASE)
+            input_cost = float(input_cost_match.group(1)) / 1000000 if input_cost_match else None
+            output_cost = float(output_cost_match.group(1)) / 1000000 if output_cost_match else None
+            if context_size is None or input_cost is None or output_cost is None:
+                return {}
+            params = {
+                "max_input_tokens": context_size,
+                "max_tokens": context_size,
+                "max_output_tokens": context_size,
+                "input_cost_per_token": input_cost,
+                "output_cost_per_token": output_cost,
+            }
+            return params
+        except Exception as e:
+            print("Error fetching openrouter info:", str(e))
+            return {}
+
+
+model_info_manager = ModelInfoManager()
+
+
+class Model(ModelSettings):
+    def __init__(
+        self, model, weak_model=None, editor_model=None, editor_edit_format=None, verbose=False
+    ):
+        # Map any alias to its canonical name
+        model = MODEL_ALIASES.get(model, model)
+
+        self.name = model
+        self.verbose = verbose
+
+        self.max_chat_history_tokens = 1024
+        self.weak_model = None
+        self.editor_model = None
+
+        # Find the extra settings
+        self.extra_model_settings = next(
+            (ms for ms in MODEL_SETTINGS if ms.name == "aider/extra_params"), None
+        )
+
+        self.info = self.get_model_info(model)
+
+        # Are all needed keys/params available?
+        res = self.validate_environment()
+        self.missing_keys = res.get("missing_keys")
+        self.keys_in_environment = res.get("keys_in_environment")
+
+        max_input_tokens = self.info.get("max_input_tokens") or 0
+        # Calculate max_chat_history_tokens as 1/16th of max_input_tokens,
+        # with minimum 1k and maximum 8k
+        self.max_chat_history_tokens = min(max(max_input_tokens / 16, 1024), 8192)
+
+        self.configure_model_settings(model)
+        if weak_model is False:
+            self.weak_model_name = None
+        else:
+            self.get_weak_model(weak_model)
+
+        if editor_model is False:
+            self.editor_model_name = None
+        else:
+            self.get_editor_model(editor_model, editor_edit_format)
+
+    def get_model_info(self, model):
+        return model_info_manager.get_model_info(model)
+
+    def _copy_fields(self, source):
+        """Helper to copy fields from a ModelSettings instance to self"""
+        for field in fields(ModelSettings):
+            val = getattr(source, field.name)
+            setattr(self, field.name, val)
+
+        # Handle backward compatibility: if remove_reasoning is set but reasoning_tag isn't,
+        # use remove_reasoning's value for reasoning_tag
+        if self.reasoning_tag is None and self.remove_reasoning is not None:
+            self.reasoning_tag = self.remove_reasoning
+
+    def configure_model_settings(self, model):
+        # Look for exact model match
+        exact_match = False
+        for ms in MODEL_SETTINGS:
+            # direct match, or match "provider/<model>"
+            if model == ms.name:
+                self._copy_fields(ms)
+                exact_match = True
+                break  # Continue to apply overrides
+
+        # Initialize accepts_settings if it's None
+        if self.accepts_settings is None:
+            self.accepts_settings = []
+
+        model = model.lower()
+
+        # If no exact match, try generic settings
+        if not exact_match:
+            self.apply_generic_model_settings(model)
+
+        # Apply override settings last if they exist
+        if (
+            self.extra_model_settings
+            and self.extra_model_settings.extra_params
+            and self.extra_model_settings.name == "aider/extra_params"
+        ):
+            # Initialize extra_params if it doesn't exist
+            if not self.extra_params:
+                self.extra_params = {}
+
+            # Deep merge the extra_params dicts
+            for key, value in self.extra_model_settings.extra_params.items():
+                if isinstance(value, dict) and isinstance(self.extra_params.get(key), dict):
+                    # For nested dicts, merge recursively
+                    self.extra_params[key] = {**self.extra_params[key], **value}
+                else:
+                    # For non-dict values, simply update
+                    self.extra_params[key] = value
+
+        # Ensure OpenRouter models accept thinking_tokens and reasoning_effort
+        if self.name.startswith("openrouter/"):
+            if self.accepts_settings is None:
+                self.accepts_settings = []
+            if "thinking_tokens" not in self.accepts_settings:
+                self.accepts_settings.append("thinking_tokens")
+            if "reasoning_effort" not in self.accepts_settings:
+                self.accepts_settings.append("reasoning_effort")
+
+    def apply_generic_model_settings(self, model):
+        if "/o3-mini" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.use_temperature = False
+            self.system_prompt_prefix = "Formatting re-enabled. "
+            self.system_prompt_prefix = "Formatting re-enabled. "
+            if "reasoning_effort" not in self.accepts_settings:
+                self.accepts_settings.append("reasoning_effort")
+            return  # <--
+
+        if "gpt-4.1-mini" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.reminder = "sys"
+            self.examples_as_sys_msg = False
+            return  # <--
+
+        if "gpt-4.1" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.reminder = "sys"
+            self.examples_as_sys_msg = False
+            return  # <--
+
+        last_segment = model.split("/")[-1]
+        if last_segment in ("gpt-5", "gpt-5-2025-08-07"):
+            self.use_temperature = False
+            self.edit_format = "diff"
+            if "reasoning_effort" not in self.accepts_settings:
+                self.accepts_settings.append("reasoning_effort")
+            return  # <--
+
+        if "/o1-mini" in model:
+            self.use_repo_map = True
+            self.use_temperature = False
+            self.use_system_prompt = False
+            return  # <--
+
+        if "/o1-preview" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.use_temperature = False
+            self.use_system_prompt = False
+            return  # <--
+
+        if "/o1" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.use_temperature = False
+            self.streaming = False
+            self.system_prompt_prefix = "Formatting re-enabled. "
+            if "reasoning_effort" not in self.accepts_settings:
+                self.accepts_settings.append("reasoning_effort")
+            return  # <--
+
+        if "deepseek" in model and "v3" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.reminder = "sys"
+            self.examples_as_sys_msg = True
+            return  # <--
+
+        if "deepseek" in model and ("r1" in model or "reasoning" in model):
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.examples_as_sys_msg = True
+            self.use_temperature = False
+            self.reasoning_tag = "think"
+            return  # <--
+
+        if ("llama3" in model or "llama-3" in model) and "70b" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.send_undo_reply = True
+            self.examples_as_sys_msg = True
+            return  # <--
+
+        if "gpt-4-turbo" in model or ("gpt-4-" in model and "-preview" in model):
+            self.edit_format = "udiff"
+            self.use_repo_map = True
+            self.send_undo_reply = True
+            return  # <--
+
+        if "gpt-4" in model or "claude-3-opus" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.send_undo_reply = True
+            return  # <--
+
+        if "gpt-3.5" in model or "gpt-4" in model:
+            self.reminder = "sys"
+            return  # <--
+
+        if "3-7-sonnet" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.examples_as_sys_msg = True
+            self.reminder = "user"
+            if "thinking_tokens" not in self.accepts_settings:
+                self.accepts_settings.append("thinking_tokens")
+            return  # <--
+
+        if "3.5-sonnet" in model or "3-5-sonnet" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.examples_as_sys_msg = True
+            self.reminder = "user"
+            return  # <--
+
+        if model.startswith("o1-") or "/o1-" in model:
+            self.use_system_prompt = False
+            self.use_temperature = False
+            return  # <--
+
+        if (
+            "qwen" in model
+            and "coder" in model
+            and ("2.5" in model or "2-5" in model)
+            and "32b" in model
+        ):
+            self.edit_format = "diff"
+            self.editor_edit_format = "editor-diff"
+            self.use_repo_map = True
+            return  # <--
+
+        if "qwq" in model and "32b" in model and "preview" not in model:
+            self.edit_format = "diff"
+            self.editor_edit_format = "editor-diff"
+            self.use_repo_map = True
+            self.reasoning_tag = "think"
+            self.examples_as_sys_msg = True
+            self.use_temperature = 0.6
+            self.extra_params = dict(top_p=0.95)
+            return  # <--
+
+        if "qwen3" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            if "235b" in model:
+                self.system_prompt_prefix = "/no_think"
+                self.use_temperature = 0.7
+                self.extra_params = {"top_p": 0.8, "top_k": 20, "min_p": 0.0}
+            else:
+                self.examples_as_sys_msg = True
+                self.use_temperature = 0.6
+                self.reasoning_tag = "think"
+                self.extra_params = {"top_p": 0.95, "top_k": 20, "min_p": 0.0}
+            return  # <--
+
+        # use the defaults
+        if self.edit_format == "diff":
+            self.use_repo_map = True
+            return  # <--
+
+    def __str__(self):
+        return self.name
+
+    def get_weak_model(self, provided_weak_model_name):
+        # If weak_model_name is provided, override the model settings
+        if provided_weak_model_name:
+            self.weak_model_name = provided_weak_model_name
+
+        if not self.weak_model_name:
+            self.weak_model = self
+            return
+
+        if self.weak_model_name == self.name:
+            self.weak_model = self
+            return
+
+        self.weak_model = Model(
+            self.weak_model_name,
+            weak_model=False,
+        )
+        return self.weak_model
+
+    def commit_message_models(self):
+        return [self.weak_model, self]
+
+    def get_editor_model(self, provided_editor_model_name, editor_edit_format):
+        # If editor_model_name is provided, override the model settings
+        if provided_editor_model_name:
+            self.editor_model_name = provided_editor_model_name
+        if editor_edit_format:
+            self.editor_edit_format = editor_edit_format
+
+        if not self.editor_model_name or self.editor_model_name == self.name:
+            self.editor_model = self
+        else:
+            self.editor_model = Model(
+                self.editor_model_name,
+                editor_model=False,
+            )
+
+        if not self.editor_edit_format:
+            self.editor_edit_format = self.editor_model.edit_format
+            if self.editor_edit_format in ("diff", "whole", "diff-fenced"):
+                self.editor_edit_format = "editor-" + self.editor_edit_format
+
+        return self.editor_model
+
+    def tokenizer(self, text):
+        return litellm.encode(model=self.name, text=text)
+
+    def token_count(self, messages):
+        if isinstance(messages, dict):
+            messages = [messages]
+
+        if isinstance(messages, list):
+            try:
+                return litellm.token_counter(model=self.name, messages=messages)
+            except Exception:
+                pass  # fall back to raw tokenizer
+
+        if not self.tokenizer:
+            return 0
+
+        if isinstance(messages, str):
+            msgs = messages
+        else:
+            msgs = json.dumps(messages)
+
+        try:
+            return len(self.tokenizer(msgs))
+        except Exception as err:
+            print(f"Unable to count tokens with tokenizer: {err}")
+            return 0
+
+    def token_count_for_image(self, fname):
+        """
+        Calculate the token cost for an image assuming high detail.
+        The token cost is determined by the size of the image.
+        :param fname: The filename of the image.
+        :return: The token cost for the image.
+        """
+        width, height = self.get_image_size(fname)
+
+        # If the image is larger than 2048 in any dimension, scale it down to fit within 2048x2048
+        max_dimension = max(width, height)
+        if max_dimension > 2048:
+            scale_factor = 2048 / max_dimension
+            width = int(width * scale_factor)
+            height = int(height * scale_factor)
+
+        # Scale the image such that the shortest side is 768 pixels long
+        min_dimension = min(width, height)
+        scale_factor = 768 / min_dimension
+        width = int(width * scale_factor)
+        height = int(height * scale_factor)
+
+        # Calculate the number of 512x512 tiles needed to cover the image
+        tiles_width = math.ceil(width / 512)
+        tiles_height = math.ceil(height / 512)
+        num_tiles = tiles_width * tiles_height
+
+        # Each tile costs 170 tokens, and there's an additional fixed cost of 85 tokens
+        token_cost = num_tiles * 170 + 85
+        return token_cost
+
+    def get_image_size(self, fname):
+        """
+        Retrieve the size of an image.
+        :param fname: The filename of the image.
+        :return: A tuple (width, height) representing the image size in pixels.
+        """
+        with Image.open(fname) as img:
+            return img.size
+
+    def fast_validate_environment(self):
+        """Fast path for common models. Avoids forcing litellm import."""
+
+        model = self.name
+
+        pieces = model.split("/")
+        if len(pieces) > 1:
+            provider = pieces[0]
+        else:
+            provider = None
+
+        keymap = dict(
+            openrouter="OPENROUTER_API_KEY",
+            openai="OPENAI_API_KEY",
+            deepseek="DEEPSEEK_API_KEY",
+            gemini="GEMINI_API_KEY",
+            anthropic="ANTHROPIC_API_KEY",
+            groq="GROQ_API_KEY",
+            fireworks_ai="FIREWORKS_API_KEY",
+        )
+        var = None
+        if model in OPENAI_MODELS:
+            var = "OPENAI_API_KEY"
+        elif model in ANTHROPIC_MODELS:
+            var = "ANTHROPIC_API_KEY"
+        else:
+            var = keymap.get(provider)
+
+        if var and os.environ.get(var):
+            return dict(keys_in_environment=[var], missing_keys=[])
+
+    def validate_environment(self):
+        res = self.fast_validate_environment()
+        if res:
+            return res
+
+        # https://github.com/BerriAI/litellm/issues/3190
+
+        model = self.name
+        res = litellm.validate_environment(model)
+
+        # If missing AWS credential keys but AWS_PROFILE is set, consider AWS credentials valid
+        if res["missing_keys"] and any(
+            key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] for key in res["missing_keys"]
+        ):
+            if model.startswith("bedrock/") or model.startswith("us.anthropic."):
+                if os.environ.get("AWS_PROFILE"):
+                    res["missing_keys"] = [
+                        k
+                        for k in res["missing_keys"]
+                        if k not in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"]
+                    ]
+                    if not res["missing_keys"]:
+                        res["keys_in_environment"] = True
+
+        if res["keys_in_environment"]:
+            return res
+        if res["missing_keys"]:
+            return res
+
+        provider = self.info.get("litellm_provider", "").lower()
+        if provider == "cohere_chat":
+            return validate_variables(["COHERE_API_KEY"])
+        if provider == "gemini":
+            return validate_variables(["GEMINI_API_KEY"])
+        if provider == "groq":
+            return validate_variables(["GROQ_API_KEY"])
+
+        return res
+
+    def get_repo_map_tokens(self):
+        map_tokens = 1024
+        max_inp_tokens = self.info.get("max_input_tokens")
+        if max_inp_tokens:
+            map_tokens = max_inp_tokens / 8
+            map_tokens = min(map_tokens, 4096)
+            map_tokens = max(map_tokens, 1024)
+        return map_tokens
+
+    def set_reasoning_effort(self, effort):
+        """Set the reasoning effort parameter for models that support it"""
+        if effort is not None:
+            if self.name.startswith("openrouter/"):
+                if not self.extra_params:
+                    self.extra_params = {}
+                if "extra_body" not in self.extra_params:
+                    self.extra_params["extra_body"] = {}
+                self.extra_params["extra_body"]["reasoning"] = {"effort": effort}
+            else:
+                if not self.extra_params:
+                    self.extra_params = {}
+                if "extra_body" not in self.extra_params:
+                    self.extra_params["extra_body"] = {}
+                self.extra_params["extra_body"]["reasoning_effort"] = effort
+
+    def parse_token_value(self, value):
+        """
+        Parse a token value string into an integer.
+        Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc.
+
+        Args:
+            value: String or int token value
+
+        Returns:
+            Integer token value
+        """
+        if isinstance(value, int):
+            return value
+
+        if not isinstance(value, str):
+            return int(value)  # Try to convert to int
+
+        value = value.strip().upper()
+
+        if value.endswith("K"):
+            multiplier = 1024
+            value = value[:-1]
+        elif value.endswith("M"):
+            multiplier = 1024 * 1024
+            value = value[:-1]
+        else:
+            multiplier = 1
+
+        # Convert to float first to handle decimal values like "10.5k"
+        return int(float(value) * multiplier)
+
+    def set_thinking_tokens(self, value):
+        """
+        Set the thinking token budget for models that support it.
+        Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc.
+        Pass "0" to disable thinking tokens.
+        """
+        if value is not None:
+            num_tokens = self.parse_token_value(value)
+            self.use_temperature = False
+            if not self.extra_params:
+                self.extra_params = {}
+
+            # OpenRouter models use 'reasoning' instead of 'thinking'
+            if self.name.startswith("openrouter/"):
+                if "extra_body" not in self.extra_params:
+                    self.extra_params["extra_body"] = {}
+                if num_tokens > 0:
+                    self.extra_params["extra_body"]["reasoning"] = {"max_tokens": num_tokens}
+                else:
+                    if "reasoning" in self.extra_params["extra_body"]:
+                        del self.extra_params["extra_body"]["reasoning"]
+            else:
+                if num_tokens > 0:
+                    self.extra_params["thinking"] = {"type": "enabled", "budget_tokens": num_tokens}
+                else:
+                    if "thinking" in self.extra_params:
+                        del self.extra_params["thinking"]
+
+    def get_raw_thinking_tokens(self):
+        """Get formatted thinking token budget if available"""
+        budget = None
+
+        if self.extra_params:
+            # Check for OpenRouter reasoning format
+            if self.name.startswith("openrouter/"):
+                if (
+                    "extra_body" in self.extra_params
+                    and "reasoning" in self.extra_params["extra_body"]
+                    and "max_tokens" in self.extra_params["extra_body"]["reasoning"]
+                ):
+                    budget = self.extra_params["extra_body"]["reasoning"]["max_tokens"]
+            # Check for standard thinking format
+            elif (
+                "thinking" in self.extra_params and "budget_tokens" in self.extra_params["thinking"]
+            ):
+                budget = self.extra_params["thinking"]["budget_tokens"]
+
+        return budget
+
+    def get_thinking_tokens(self):
+        budget = self.get_raw_thinking_tokens()
+
+        if budget is not None:
+            # Format as xx.yK for thousands, xx.yM for millions
+            if budget >= 1024 * 1024:
+                value = budget / (1024 * 1024)
+                if value == int(value):
+                    return f"{int(value)}M"
+                else:
+                    return f"{value:.1f}M"
+            else:
+                value = budget / 1024
+                if value == int(value):
+                    return f"{int(value)}k"
+                else:
+                    return f"{value:.1f}k"
+        return None
+
+    def get_reasoning_effort(self):
+        """Get reasoning effort value if available"""
+        if self.extra_params:
+            # Check for OpenRouter reasoning format
+            if self.name.startswith("openrouter/"):
+                if (
+                    "extra_body" in self.extra_params
+                    and "reasoning" in self.extra_params["extra_body"]
+                    and "effort" in self.extra_params["extra_body"]["reasoning"]
+                ):
+                    return self.extra_params["extra_body"]["reasoning"]["effort"]
+            # Check for standard reasoning_effort format (e.g. in extra_body)
+            elif (
+                "extra_body" in self.extra_params
+                and "reasoning_effort" in self.extra_params["extra_body"]
+            ):
+                return self.extra_params["extra_body"]["reasoning_effort"]
+        return None
+
+    def is_deepseek(self):
+        name = self.name.lower()
+        if "deepseek" not in name:
+            return
+        return True
+
+    def is_ollama(self):
+        return self.name.startswith("ollama/") or self.name.startswith("ollama_chat/")
+
+    async def send_completion(
+        self, messages, functions, stream, temperature=None, tools=None, max_tokens=None
+    ):
+        if os.environ.get("AIDER_SANITY_CHECK_TURNS"):
+            sanity_check_messages(messages)
+
+        messages = ensure_alternating_roles(messages)
+
+        if self.verbose:
+            for message in messages:
+                msg_role = message.get("role")
+                msg_content = message.get("content") if message.get("content") else ""
+                msg_trunc = ""
+
+                if message.get("content"):
+                    msg_trunc = message.get("content")[:30]
+
+                print(f"{msg_role} ({len(msg_content)}): {msg_trunc}")
+
+        kwargs = dict(model=self.name, stream=stream)
+
+        if self.use_temperature is not False:
+            if temperature is None:
+                if isinstance(self.use_temperature, bool):
+                    temperature = 0
+                else:
+                    temperature = float(self.use_temperature)
+
+            kwargs["temperature"] = temperature
+
+        # `tools` is for modern tool usage. `functions` is for legacy/forced calls.
+        # This handles `base_coder` sending both with same content for `navigator_coder`.
+        effective_tools = []
+        if tools:
+            effective_tools.extend(tools)
+
+        if functions:
+            # Convert legacy `functions` to `tools` format and add them
+            effective_tools.extend([dict(type="function", function=f) for f in functions])
+
+        if effective_tools:
+            # Deduplicate tools based on function name
+            seen_tool_names = set()
+            deduped_tools = []
+            for tool in effective_tools:
+                tool_name = tool.get("function", {}).get("name")
+                if tool_name and tool_name not in seen_tool_names:
+                    deduped_tools.append(tool)
+                    seen_tool_names.add(tool_name)
+            effective_tools = deduped_tools
+            kwargs["tools"] = effective_tools
+
+        # Forcing a function call is for legacy style `functions` with a single function.
+        # This is used by ArchitectCoder and not intended for NavigatorCoder's tools.
+        if functions and len(functions) == 1:
+            function = functions[0]
+
+            if "name" in function:
+                tool_name = function.get("name")
+                if tool_name:
+                    kwargs["tool_choice"] = {"type": "function", "function": {"name": tool_name}}
+
+        if self.extra_params:
+            kwargs.update(self.extra_params)
+
+        if max_tokens:
+            kwargs["max_tokens"] = max_tokens
+
+        if "max_tokens" in kwargs and kwargs["max_tokens"]:
+            kwargs["max_completion_tokens"] = kwargs.pop("max_tokens")
+        if self.is_ollama() and "num_ctx" not in kwargs:
+            num_ctx = int(self.token_count(messages) * 1.25) + 8192
+            kwargs["num_ctx"] = num_ctx
+
+        key = json.dumps(kwargs, sort_keys=True).encode()
+        # dump(kwargs)
+
+        hash_object = hashlib.sha1(key)
+        if "timeout" not in kwargs:
+            kwargs["timeout"] = request_timeout
+        if self.verbose:
+            dump(kwargs)
+        kwargs["messages"] = messages
+
+        # Are we using github copilot?
+        if "GITHUB_COPILOT_TOKEN" in os.environ or self.name.startswith("github_copilot/"):
+            if "extra_headers" not in kwargs:
+                kwargs["extra_headers"] = {
+                    "Editor-Version": f"aider/{__version__}",
+                    "Copilot-Integration-Id": "vscode-chat",
+                }
+
+        try:
+            res = await litellm.acompletion(**kwargs)
+        except Exception as err:
+            print(f"LiteLLM API Error: {str(err)}")
+            res = self.model_error_response()
+
+            if self.verbose:
+                print(f"LiteLLM API Error: {str(err)}")
+                raise
+
+        return hash_object, res
+
+    async def simple_send_with_retries(self, messages, max_tokens=None):
+        from aider.exceptions import LiteLLMExceptions
+
+        litellm_ex = LiteLLMExceptions()
+        if "deepseek-reasoner" in self.name:
+            messages = ensure_alternating_roles(messages)
+        retry_delay = 0.125
+
+        if self.verbose:
+            dump(messages)
+
+        while True:
+            try:
+                _hash, response = await self.send_completion(
+                    messages=messages,
+                    functions=None,
+                    stream=False,
+                    max_tokens=max_tokens,
+                )
+                if not response or not hasattr(response, "choices") or not response.choices:
+                    return None
+                res = response.choices[0].message.content
+                from aider.reasoning_tags import remove_reasoning_content
+
+                return remove_reasoning_content(res, self.reasoning_tag)
+
+            except litellm_ex.exceptions_tuple() as err:
+                ex_info = litellm_ex.get_ex_info(err)
+                print(str(err))
+                if ex_info.description:
+                    print(ex_info.description)
+                should_retry = ex_info.retry
+                if should_retry:
+                    retry_delay *= 2
+                    if retry_delay > RETRY_TIMEOUT:
+                        should_retry = False
+                if not should_retry:
+                    return None
+                print(f"Retrying in {retry_delay:.1f} seconds...")
+                time.sleep(retry_delay)
+                continue
+            except AttributeError:
+                return None
+
+    async def model_error_response(self):
+        for i in range(1):
+            await asyncio.sleep(0.1)
+            yield litellm.ModelResponse(
+                choices=[
+                    litellm.Choices(
+                        finish_reason="stop",
+                        index=0,
+                        message=litellm.Message(
+                            content="Model API Response Error. Please retry the previous request"
+                        ),  # Provide an empty message object
+                    )
+                ],
+                model=self.name,
+            )
+
+
+def register_models(model_settings_fnames):
+    files_loaded = []
+    for model_settings_fname in model_settings_fnames:
+        if not os.path.exists(model_settings_fname):
+            continue
+
+        if not Path(model_settings_fname).read_text().strip():
+            continue
+
+        try:
+            with open(model_settings_fname, "r") as model_settings_file:
+                model_settings_list = yaml.safe_load(model_settings_file)
+
+            for model_settings_dict in model_settings_list:
+                model_settings = ModelSettings(**model_settings_dict)
+
+                # Remove all existing settings for this model name
+                MODEL_SETTINGS[:] = [ms for ms in MODEL_SETTINGS if ms.name != model_settings.name]
+                # Add the new settings
+                MODEL_SETTINGS.append(model_settings)
+        except Exception as e:
+            raise Exception(f"Error loading model settings from {model_settings_fname}: {e}")
+        files_loaded.append(model_settings_fname)
+
+    return files_loaded
+
+
+def register_litellm_models(model_fnames):
+    files_loaded = []
+    for model_fname in model_fnames:
+        if not os.path.exists(model_fname):
+            continue
+
+        try:
+            data = Path(model_fname).read_text()
+            if not data.strip():
+                continue
+            model_def = json5.loads(data)
+            if not model_def:
+                continue
+
+            # Defer registration with litellm to faster path.
+            model_info_manager.local_model_metadata.update(model_def)
+        except Exception as e:
+            raise Exception(f"Error loading model definition from {model_fname}: {e}")
+
+        files_loaded.append(model_fname)
+
+    return files_loaded
+
+
+def validate_variables(vars):
+    missing = []
+    for var in vars:
+        if var not in os.environ:
+            missing.append(var)
+    if missing:
+        return dict(keys_in_environment=False, missing_keys=missing)
+    return dict(keys_in_environment=True, missing_keys=missing)
+
+
+def sanity_check_models(io, main_model):
+    problem_main = sanity_check_model(io, main_model)
+
+    problem_weak = None
+    if main_model.weak_model and main_model.weak_model is not main_model:
+        problem_weak = sanity_check_model(io, main_model.weak_model)
+
+    problem_editor = None
+    if (
+        main_model.editor_model
+        and main_model.editor_model is not main_model
+        and main_model.editor_model is not main_model.weak_model
+    ):
+        problem_editor = sanity_check_model(io, main_model.editor_model)
+
+    return problem_main or problem_weak or problem_editor
+
+
+def sanity_check_model(io, model):
+    show = False
+
+    if model.missing_keys:
+        show = True
+        io.tool_warning(f"Warning: {model} expects these environment variables")
+        for key in model.missing_keys:
+            value = os.environ.get(key, "")
+            status = "Set" if value else "Not set"
+            io.tool_output(f"- {key}: {status}")
+
+        if platform.system() == "Windows":
+            io.tool_output(
+                "Note: You may need to restart your terminal or command prompt for `setx` to take"
+                " effect."
+            )
+
+    elif not model.keys_in_environment:
+        show = True
+        io.tool_warning(f"Warning for {model}: Unknown which environment variables are required.")
+
+    # Check for model-specific dependencies
+    check_for_dependencies(io, model.name)
+
+    if not model.info:
+        show = True
+        io.tool_warning(
+            f"Warning for {model}: Unknown context window size and costs, using sane defaults."
+        )
+
+        possible_matches = fuzzy_match_models(model.name)
+        if possible_matches:
+            io.tool_output("Did you mean one of these?")
+            for match in possible_matches:
+                io.tool_output(f"- {match}")
+
+    return show
+
+
+def check_for_dependencies(io, model_name):
+    """
+    Check for model-specific dependencies and install them if needed.
+
+    Args:
+        io: The IO object for user interaction
+        model_name: The name of the model to check dependencies for
+    """
+    # Check if this is a Bedrock model and ensure boto3 is installed
+    if model_name.startswith("bedrock/"):
+        check_pip_install_extra(
+            io, "boto3", "AWS Bedrock models require the boto3 package.", ["boto3"]
+        )
+
+    # Check if this is a Vertex AI model and ensure google-cloud-aiplatform is installed
+    elif model_name.startswith("vertex_ai/"):
+        check_pip_install_extra(
+            io,
+            "google.cloud.aiplatform",
+            "Google Vertex AI models require the google-cloud-aiplatform package.",
+            ["google-cloud-aiplatform"],
+        )
+
+
+def fuzzy_match_models(name):
+    name = name.lower()
+
+    chat_models = set()
+    model_metadata = list(litellm.model_cost.items())
+    model_metadata += list(model_info_manager.local_model_metadata.items())
+
+    for orig_model, attrs in model_metadata:
+        model = orig_model.lower()
+        if attrs.get("mode") != "chat":
+            continue
+        provider = attrs.get("litellm_provider", "").lower()
+        if not provider:
+            continue
+        provider += "/"
+
+        if model.startswith(provider):
+            fq_model = orig_model
+        else:
+            fq_model = provider + orig_model
+
+        chat_models.add(fq_model)
+        chat_models.add(orig_model)
+
+    chat_models = sorted(chat_models)
+    # exactly matching model
+    # matching_models = [
+    #    (fq,m) for fq,m in chat_models
+    #    if name == fq or name == m
+    # ]
+    # if matching_models:
+    #    return matching_models
+
+    # Check for model names containing the name
+    matching_models = [m for m in chat_models if name in m]
+    if matching_models:
+        return sorted(set(matching_models))
+
+    # Check for slight misspellings
+    models = set(chat_models)
+    matching_models = difflib.get_close_matches(name, models, n=3, cutoff=0.8)
+
+    return sorted(set(matching_models))
+
+
+def print_matching_models(io, search):
+    matches = fuzzy_match_models(search)
+    if matches:
+        io.tool_output(f'Models which match "{search}":')
+        for model in matches:
+            io.tool_output(f"- {model}")
+    else:
+        io.tool_output(f'No models match "{search}".')
+
+
+def get_model_settings_as_yaml():
+    from dataclasses import fields
+
+    import yaml
+
+    model_settings_list = []
+    # Add default settings first with all field values
+    defaults = {}
+    for field in fields(ModelSettings):
+        defaults[field.name] = field.default
+    defaults["name"] = "(default values)"
+    model_settings_list.append(defaults)
+
+    # Sort model settings by name
+    for ms in sorted(MODEL_SETTINGS, key=lambda x: x.name):
+        # Create dict with explicit field order
+        model_settings_dict = {}
+        for field in fields(ModelSettings):
+            value = getattr(ms, field.name)
+            if value != field.default:
+                model_settings_dict[field.name] = value
+        model_settings_list.append(model_settings_dict)
+        # Add blank line between entries
+        model_settings_list.append(None)
+
+    # Filter out None values before dumping
+    yaml_str = yaml.dump(
+        [ms for ms in model_settings_list if ms is not None],
+        default_flow_style=False,
+        sort_keys=False,  # Preserve field order from dataclass
+    )
+    # Add actual blank lines between entries
+    return yaml_str.replace("\n- ", "\n\n- ")
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python models.py <model_name> or python models.py --yaml")
+        sys.exit(1)
+
+    if sys.argv[1] == "--yaml":
+        yaml_string = get_model_settings_as_yaml()
+        print(yaml_string)
+    else:
+        model_name = sys.argv[1]
+        matching_models = fuzzy_match_models(model_name)
+
+        if matching_models:
+            print(f"Matching models for '{model_name}':")
+            for model in matching_models:
+                print(model)
+        else:
+            print(f"No matching models found for '{model_name}'.")
+
+
+if __name__ == "__main__":
+    main()

From feb5e7a6dc45509379599deb71e46197415ba019 Mon Sep 17 00:00:00 2001
From: Your Name <szmania@yahoo.com>
Date: Wed, 26 Nov 2025 14:08:47 -0800
Subject: [PATCH 04/17] feat: Deduplicate tool declarations and add a test for
 it.

Co-authored-by: aider (openai/gemini_szmania/gemini-2.5-pro) <aider@aider.chat>
---
 tests/basic/test_models.py | 1252 +++++++++++++++++++-----------------
 1 file changed, 648 insertions(+), 604 deletions(-)

diff --git a/tests/basic/test_models.py b/tests/basic/test_models.py
index 11e42b807af..145323e40cc 100644
--- a/tests/basic/test_models.py
+++ b/tests/basic/test_models.py
@@ -1,604 +1,648 @@
-import unittest
-from unittest.mock import ANY, MagicMock, patch
-
-from aider.models import (
-    ANTHROPIC_BETA_HEADER,
-    Model,
-    ModelInfoManager,
-    register_models,
-    sanity_check_model,
-    sanity_check_models,
-)
-
-
-class TestModels(unittest.TestCase):
-    def setUp(self):
-        """Reset MODEL_SETTINGS before each test"""
-        from aider.models import MODEL_SETTINGS
-
-        self._original_settings = MODEL_SETTINGS.copy()
-
-    def tearDown(self):
-        """Restore original MODEL_SETTINGS after each test"""
-        from aider.models import MODEL_SETTINGS
-
-        MODEL_SETTINGS.clear()
-        MODEL_SETTINGS.extend(self._original_settings)
-
-    def test_get_model_info_nonexistent(self):
-        manager = ModelInfoManager()
-        info = manager.get_model_info("non-existent-model")
-        self.assertEqual(info, {})
-
-    def test_max_context_tokens(self):
-        model = Model("gpt-3.5-turbo")
-        self.assertEqual(model.info["max_input_tokens"], 16385)
-
-        model = Model("gpt-3.5-turbo-16k")
-        self.assertEqual(model.info["max_input_tokens"], 16385)
-
-        model = Model("gpt-3.5-turbo-1106")
-        self.assertEqual(model.info["max_input_tokens"], 16385)
-
-        model = Model("gpt-4")
-        self.assertEqual(model.info["max_input_tokens"], 8 * 1024)
-
-        model = Model("gpt-4-32k")
-        self.assertEqual(model.info["max_input_tokens"], 32 * 1024)
-
-        model = Model("gpt-4-0613")
-        self.assertEqual(model.info["max_input_tokens"], 8 * 1024)
-
-    @patch("os.environ")
-    def test_sanity_check_model_all_set(self, mock_environ):
-        mock_environ.get.return_value = "dummy_value"
-        mock_io = MagicMock()
-        model = MagicMock()
-        model.name = "test-model"
-        model.missing_keys = ["API_KEY1", "API_KEY2"]
-        model.keys_in_environment = True
-        model.info = {"some": "info"}
-
-        sanity_check_model(mock_io, model)
-
-        mock_io.tool_output.assert_called()
-        calls = mock_io.tool_output.call_args_list
-        self.assertIn("- API_KEY1: Set", str(calls))
-        self.assertIn("- API_KEY2: Set", str(calls))
-
-    @patch("os.environ")
-    def test_sanity_check_model_not_set(self, mock_environ):
-        mock_environ.get.return_value = ""
-        mock_io = MagicMock()
-        model = MagicMock()
-        model.name = "test-model"
-        model.missing_keys = ["API_KEY1", "API_KEY2"]
-        model.keys_in_environment = True
-        model.info = {"some": "info"}
-
-        sanity_check_model(mock_io, model)
-
-        mock_io.tool_output.assert_called()
-        calls = mock_io.tool_output.call_args_list
-        self.assertIn("- API_KEY1: Not set", str(calls))
-        self.assertIn("- API_KEY2: Not set", str(calls))
-
-    def test_sanity_check_models_bogus_editor(self):
-        mock_io = MagicMock()
-        main_model = Model("gpt-4")
-        main_model.editor_model = Model("bogus-model")
-
-        result = sanity_check_models(mock_io, main_model)
-
-        self.assertTrue(
-            result
-        )  # Should return True because there's a problem with the editor model
-        mock_io.tool_warning.assert_called_with(ANY)  # Ensure a warning was issued
-
-        warning_messages = [
-            warning_call.args[0] for warning_call in mock_io.tool_warning.call_args_list
-        ]
-        print("Warning messages:", warning_messages)  # Add this line
-
-        self.assertGreaterEqual(mock_io.tool_warning.call_count, 1)  # Expect two warnings
-        self.assertTrue(
-            any("bogus-model" in msg for msg in warning_messages)
-        )  # Check that one of the warnings mentions the bogus model
-
-    @patch("aider.models.check_for_dependencies")
-    def test_sanity_check_model_calls_check_dependencies(self, mock_check_deps):
-        """Test that sanity_check_model calls check_for_dependencies"""
-        mock_io = MagicMock()
-        model = MagicMock()
-        model.name = "test-model"
-        model.missing_keys = []
-        model.keys_in_environment = True
-        model.info = {"some": "info"}
-
-        sanity_check_model(mock_io, model)
-
-        # Verify check_for_dependencies was called with the model name
-        mock_check_deps.assert_called_once_with(mock_io, "test-model")
-
-    def test_model_aliases(self):
-        # Test common aliases
-        model = Model("4")
-        self.assertEqual(model.name, "gpt-4-0613")
-
-        model = Model("4o")
-        self.assertEqual(model.name, "gpt-4o")
-
-        model = Model("35turbo")
-        self.assertEqual(model.name, "gpt-3.5-turbo")
-
-        model = Model("35-turbo")
-        self.assertEqual(model.name, "gpt-3.5-turbo")
-
-        model = Model("3")
-        self.assertEqual(model.name, "gpt-3.5-turbo")
-
-        model = Model("sonnet")
-        self.assertEqual(model.name, "anthropic/claude-sonnet-4-20250514")
-
-        model = Model("haiku")
-        self.assertEqual(model.name, "claude-3-5-haiku-20241022")
-
-        model = Model("opus")
-        self.assertEqual(model.name, "claude-opus-4-20250514")
-
-        # Test non-alias passes through unchanged
-        model = Model("gpt-4")
-        self.assertEqual(model.name, "gpt-4")
-
-    def test_o1_use_temp_false(self):
-        # Test GitHub Copilot models
-        model = Model("github/o1-mini")
-        self.assertEqual(model.name, "github/o1-mini")
-        self.assertEqual(model.use_temperature, False)
-
-        model = Model("github/o1-preview")
-        self.assertEqual(model.name, "github/o1-preview")
-        self.assertEqual(model.use_temperature, False)
-
-    def test_parse_token_value(self):
-        # Create a model instance to test the parse_token_value method
-        model = Model("gpt-4")
-
-        # Test integer inputs
-        self.assertEqual(model.parse_token_value(8096), 8096)
-        self.assertEqual(model.parse_token_value(1000), 1000)
-
-        # Test string inputs
-        self.assertEqual(model.parse_token_value("8096"), 8096)
-
-        # Test k/K suffix (kilobytes)
-        self.assertEqual(model.parse_token_value("8k"), 8 * 1024)
-        self.assertEqual(model.parse_token_value("8K"), 8 * 1024)
-        self.assertEqual(model.parse_token_value("10.5k"), 10.5 * 1024)
-        self.assertEqual(model.parse_token_value("0.5K"), 0.5 * 1024)
-
-        # Test m/M suffix (megabytes)
-        self.assertEqual(model.parse_token_value("1m"), 1 * 1024 * 1024)
-        self.assertEqual(model.parse_token_value("1M"), 1 * 1024 * 1024)
-        self.assertEqual(model.parse_token_value("0.5M"), 0.5 * 1024 * 1024)
-
-        # Test with spaces
-        self.assertEqual(model.parse_token_value(" 8k "), 8 * 1024)
-
-        # Test conversion from other types
-        self.assertEqual(model.parse_token_value(8.0), 8)
-
-    def test_set_thinking_tokens(self):
-        # Test that set_thinking_tokens correctly sets the tokens with different formats
-        model = Model("gpt-4")
-
-        # Test with integer
-        model.set_thinking_tokens(8096)
-        self.assertEqual(model.extra_params["thinking"]["budget_tokens"], 8096)
-        self.assertFalse(model.use_temperature)
-
-        # Test with string
-        model.set_thinking_tokens("10k")
-        self.assertEqual(model.extra_params["thinking"]["budget_tokens"], 10 * 1024)
-
-        # Test with decimal value
-        model.set_thinking_tokens("0.5M")
-        self.assertEqual(model.extra_params["thinking"]["budget_tokens"], 0.5 * 1024 * 1024)
-
-    @patch("aider.models.check_pip_install_extra")
-    def test_check_for_dependencies_bedrock(self, mock_check_pip):
-        """Test that check_for_dependencies calls check_pip_install_extra for Bedrock models"""
-        from aider.io import InputOutput
-
-        io = InputOutput()
-
-        # Test with a Bedrock model
-        from aider.models import check_for_dependencies
-
-        check_for_dependencies(io, "bedrock/anthropic.claude-3-sonnet-20240229-v1:0")
-
-        # Verify check_pip_install_extra was called with correct arguments
-        mock_check_pip.assert_called_once_with(
-            io, "boto3", "AWS Bedrock models require the boto3 package.", ["boto3"]
-        )
-
-    @patch("aider.models.check_pip_install_extra")
-    def test_check_for_dependencies_vertex_ai(self, mock_check_pip):
-        """Test that check_for_dependencies calls check_pip_install_extra for Vertex AI models"""
-        from aider.io import InputOutput
-
-        io = InputOutput()
-
-        # Test with a Vertex AI model
-        from aider.models import check_for_dependencies
-
-        check_for_dependencies(io, "vertex_ai/gemini-1.5-pro")
-
-        # Verify check_pip_install_extra was called with correct arguments
-        mock_check_pip.assert_called_once_with(
-            io,
-            "google.cloud.aiplatform",
-            "Google Vertex AI models require the google-cloud-aiplatform package.",
-            ["google-cloud-aiplatform"],
-        )
-
-    @patch("aider.models.check_pip_install_extra")
-    def test_check_for_dependencies_other_model(self, mock_check_pip):
-        """Test that check_for_dependencies doesn't call check_pip_install_extra for other models"""
-        from aider.io import InputOutput
-
-        io = InputOutput()
-
-        # Test with a non-Bedrock, non-Vertex AI model
-        from aider.models import check_for_dependencies
-
-        check_for_dependencies(io, "gpt-4")
-
-        # Verify check_pip_install_extra was not called
-        mock_check_pip.assert_not_called()
-
-    def test_get_repo_map_tokens(self):
-        # Test default case (no max_input_tokens in info)
-        model = Model("gpt-4")
-        model.info = {}
-        self.assertEqual(model.get_repo_map_tokens(), 1024)
-
-        # Test minimum boundary (max_input_tokens < 8192)
-        model.info = {"max_input_tokens": 4096}
-        self.assertEqual(model.get_repo_map_tokens(), 1024)
-
-        # Test middle range (max_input_tokens = 16384)
-        model.info = {"max_input_tokens": 16384}
-        self.assertEqual(model.get_repo_map_tokens(), 2048)
-
-        # Test maximum boundary (max_input_tokens > 32768)
-        model.info = {"max_input_tokens": 65536}
-        self.assertEqual(model.get_repo_map_tokens(), 4096)
-
-        # Test exact boundary values
-        model.info = {"max_input_tokens": 8192}
-        self.assertEqual(model.get_repo_map_tokens(), 1024)
-
-        model.info = {"max_input_tokens": 32768}
-        self.assertEqual(model.get_repo_map_tokens(), 4096)
-
-    def test_configure_model_settings(self):
-        # Test o3-mini case
-        model = Model("something/o3-mini")
-        self.assertEqual(model.edit_format, "diff")
-        self.assertTrue(model.use_repo_map)
-        self.assertFalse(model.use_temperature)
-
-        # Test o1-mini case
-        model = Model("something/o1-mini")
-        self.assertTrue(model.use_repo_map)
-        self.assertFalse(model.use_temperature)
-        self.assertFalse(model.use_system_prompt)
-
-        # Test o1-preview case
-        model = Model("something/o1-preview")
-        self.assertEqual(model.edit_format, "diff")
-        self.assertTrue(model.use_repo_map)
-        self.assertFalse(model.use_temperature)
-        self.assertFalse(model.use_system_prompt)
-
-        # Test o1 case
-        model = Model("something/o1")
-        self.assertEqual(model.edit_format, "diff")
-        self.assertTrue(model.use_repo_map)
-        self.assertFalse(model.use_temperature)
-        self.assertFalse(model.streaming)
-
-        # Test deepseek v3 case
-        model = Model("deepseek-v3")
-        self.assertEqual(model.edit_format, "diff")
-        self.assertTrue(model.use_repo_map)
-        self.assertEqual(model.reminder, "sys")
-        self.assertTrue(model.examples_as_sys_msg)
-
-        # Test deepseek reasoner case
-        model = Model("deepseek-r1")
-        self.assertEqual(model.edit_format, "diff")
-        self.assertTrue(model.use_repo_map)
-        self.assertTrue(model.examples_as_sys_msg)
-        self.assertFalse(model.use_temperature)
-        self.assertEqual(model.reasoning_tag, "think")
-
-        # Test provider/deepseek-r1 case
-        model = Model("someprovider/deepseek-r1")
-        self.assertEqual(model.edit_format, "diff")
-        self.assertTrue(model.use_repo_map)
-        self.assertTrue(model.examples_as_sys_msg)
-        self.assertFalse(model.use_temperature)
-        self.assertEqual(model.reasoning_tag, "think")
-
-        # Test provider/deepseek-v3 case
-        model = Model("anotherprovider/deepseek-v3")
-        self.assertEqual(model.edit_format, "diff")
-        self.assertTrue(model.use_repo_map)
-        self.assertEqual(model.reminder, "sys")
-        self.assertTrue(model.examples_as_sys_msg)
-
-        # Test llama3 70b case
-        model = Model("llama3-70b")
-        self.assertEqual(model.edit_format, "diff")
-        self.assertTrue(model.use_repo_map)
-        self.assertTrue(model.send_undo_reply)
-        self.assertTrue(model.examples_as_sys_msg)
-
-        # Test gpt-4 case
-        model = Model("gpt-4")
-        self.assertEqual(model.edit_format, "diff")
-        self.assertTrue(model.use_repo_map)
-        self.assertTrue(model.send_undo_reply)
-
-        # Test gpt-3.5 case
-        model = Model("gpt-3.5")
-        self.assertEqual(model.reminder, "sys")
-
-        # Test 3.5-sonnet case
-        model = Model("claude-3.5-sonnet")
-        self.assertEqual(model.edit_format, "diff")
-        self.assertTrue(model.use_repo_map)
-        self.assertTrue(model.examples_as_sys_msg)
-        self.assertEqual(model.reminder, "user")
-
-        # Test o1- prefix case
-        model = Model("o1-something")
-        self.assertFalse(model.use_system_prompt)
-        self.assertFalse(model.use_temperature)
-
-        # Test qwen case
-        model = Model("qwen-coder-2.5-32b")
-        self.assertEqual(model.edit_format, "diff")
-        self.assertEqual(model.editor_edit_format, "editor-diff")
-        self.assertTrue(model.use_repo_map)
-
-    def test_aider_extra_model_settings(self):
-        import tempfile
-
-        import yaml
-
-        # Create temporary YAML file with test settings
-        test_settings = [
-            {
-                "name": "aider/extra_params",
-                "extra_params": {
-                    "extra_headers": {"Foo": "bar"},
-                    "some_param": "some value",
-                },
-            },
-        ]
-
-        # Write to a regular file instead of NamedTemporaryFile
-        # for better cross-platform compatibility
-        tmp = tempfile.mktemp(suffix=".yml")
-        try:
-            with open(tmp, "w") as f:
-                yaml.dump(test_settings, f)
-
-            # Register the test settings
-            register_models([tmp])
-
-            # Test that defaults are applied when no exact match
-            model = Model("claude-3-5-sonnet-20240620")
-            # Test that both the override and existing headers are present
-            model = Model("claude-3-5-sonnet-20240620")
-            self.assertEqual(model.extra_params["extra_headers"]["Foo"], "bar")
-            self.assertEqual(
-                model.extra_params["extra_headers"]["anthropic-beta"],
-                ANTHROPIC_BETA_HEADER,
-            )
-            self.assertEqual(model.extra_params["some_param"], "some value")
-            self.assertEqual(model.extra_params["max_tokens"], 8192)
-
-            # Test that exact match overrides defaults but not overrides
-            model = Model("gpt-4")
-            self.assertEqual(model.extra_params["extra_headers"]["Foo"], "bar")
-            self.assertEqual(model.extra_params["some_param"], "some value")
-        finally:
-            # Clean up the temporary file
-            import os
-
-            try:
-                os.unlink(tmp)
-            except OSError:
-                pass
-
-    @patch("aider.models.litellm.acompletion")
-    @patch.object(Model, "token_count")
-    async def test_ollama_num_ctx_set_when_missing(self, mock_token_count, mock_completion):
-        mock_token_count.return_value = 1000
-
-        model = Model("ollama/llama3")
-        model.extra_params = {}
-        messages = [{"role": "user", "content": "Hello"}]
-
-        await model.send_completion(messages, functions=None, stream=False)
-
-        # Verify num_ctx was calculated and added to call
-        expected_ctx = int(1000 * 1.25) + 8192  # 9442
-        mock_completion.assert_called_once_with(
-            model=model.name,
-            messages=messages,
-            stream=False,
-            temperature=0,
-            num_ctx=expected_ctx,
-            timeout=600,
-        )
-
-    @patch("aider.models.litellm.acompletion")
-    async def test_modern_tool_call_propagation(self, mock_completion):
-        # Test modern tool calling (used for MCP Server Tool Calls)
-        model = Model("gpt-4")
-        messages = [{"role": "user", "content": "Hello"}]
-
-        await model.send_completion(
-            messages, functions=None, stream=False, tools=[dict(type="function", function="test")]
-        )
-
-        mock_completion.assert_called_with(
-            model=model.name,
-            messages=messages,
-            stream=False,
-            tools=[dict(type="function", function="test")],
-            temperature=0,
-            timeout=600,
-        )
-
-    @patch("aider.models.litellm.acompletion")
-    async def test_legacy_tool_call_propagation(self, mock_completion):
-        # Test modern tool calling (used for legacy server tool calling)
-        model = Model("gpt-4")
-        messages = [{"role": "user", "content": "Hello"}]
-
-        await model.send_completion(messages, functions=["test"], stream=False)
-
-        mock_completion.assert_called_with(
-            model=model.name,
-            messages=messages,
-            stream=False,
-            tools=[dict(type="function", function="test")],
-            temperature=0,
-            timeout=600,
-        )
-
-    @patch("aider.models.litellm.acompletion")
-    async def test_ollama_uses_existing_num_ctx(self, mock_completion):
-        model = Model("ollama/llama3")
-        model.extra_params = {"num_ctx": 4096}
-
-        messages = [{"role": "user", "content": "Hello"}]
-        await model.send_completion(messages, functions=None, stream=False)
-
-        # Should use provided num_ctx from extra_params
-        mock_completion.assert_called_once_with(
-            model=model.name,
-            messages=messages,
-            stream=False,
-            temperature=0,
-            num_ctx=4096,
-            timeout=600,
-        )
-
-    @patch("aider.models.litellm.acompletion")
-    async def test_non_ollama_no_num_ctx(self, mock_completion):
-        model = Model("gpt-4")
-        model.extra_params = {}
-        messages = [{"role": "user", "content": "Hello"}]
-
-        await model.send_completion(messages, functions=None, stream=False)
-
-        # Regular models shouldn't get num_ctx
-        mock_completion.assert_called_once_with(
-            model=model.name,
-            messages=messages,
-            stream=False,
-            temperature=0,
-            timeout=600,
-        )
-        self.assertNotIn("num_ctx", mock_completion.call_args.kwargs)
-
-    def test_use_temperature_settings(self):
-        # Test use_temperature=True (default) uses temperature=0
-        model = Model("gpt-4")
-        self.assertTrue(model.use_temperature)
-        self.assertEqual(model.use_temperature, True)
-
-        # Test use_temperature=False doesn't pass temperature
-        model = Model("github/o1-mini")
-        self.assertFalse(model.use_temperature)
-
-        # Test use_temperature as float value
-        model = Model("gpt-4")
-        model.use_temperature = 0.7
-        self.assertEqual(model.use_temperature, 0.7)
-
-    @patch("aider.models.litellm.acompletion")
-    async def test_request_timeout_default(self, mock_completion):
-        # Test default timeout is used when not specified in extra_params
-        model = Model("gpt-4")
-        model.extra_params = {}
-        messages = [{"role": "user", "content": "Hello"}]
-        await model.send_completion(messages, functions=None, stream=False)
-        mock_completion.assert_called_with(
-            model=model.name,
-            messages=messages,
-            stream=False,
-            temperature=0,
-            timeout=600,  # Default timeout
-        )
-
-    @patch("aider.models.litellm.acompletion")
-    async def test_request_timeout_from_extra_params(self, mock_completion):
-        # Test timeout from extra_params overrides default
-        model = Model("gpt-4")
-        model.extra_params = {"timeout": 300}  # 5 minutes
-        messages = [{"role": "user", "content": "Hello"}]
-        await model.send_completion(messages, functions=None, stream=False)
-        mock_completion.assert_called_with(
-            model=model.name,
-            messages=messages,
-            stream=False,
-            temperature=0,
-            timeout=300,  # From extra_params
-        )
-
-    @patch("aider.models.litellm.acompletion")
-    async def test_use_temperature_in_send_completion(self, mock_completion):
-        # Test use_temperature=True sends temperature=0
-        model = Model("gpt-4")
-        model.extra_params = {}
-        messages = [{"role": "user", "content": "Hello"}]
-        await model.send_completion(messages, functions=None, stream=False)
-        mock_completion.assert_called_with(
-            model=model.name,
-            messages=messages,
-            stream=False,
-            temperature=0,
-            timeout=600,
-        )
-
-        # Test use_temperature=False doesn't send temperature
-        model = Model("github/o1-mini")
-        messages = [{"role": "user", "content": "Hello"}]
-        await model.send_completion(messages, functions=None, stream=False)
-        self.assertNotIn("temperature", mock_completion.call_args.kwargs)
-
-        # Test use_temperature as float sends that value
-        model = Model("gpt-4")
-        model.extra_params = {}
-        model.use_temperature = 0.7
-        messages = [{"role": "user", "content": "Hello"}]
-        await model.send_completion(messages, functions=None, stream=False)
-        mock_completion.assert_called_with(
-            model=model.name,
-            messages=messages,
-            stream=False,
-            temperature=0.7,
-            timeout=600,
-        )
-
-
-if __name__ == "__main__":
-    unittest.main()
+import unittest
+from unittest.mock import ANY, MagicMock, patch
+
+from aider.models import (
+    ANTHROPIC_BETA_HEADER,
+    Model,
+    ModelInfoManager,
+    register_models,
+    sanity_check_model,
+    sanity_check_models,
+)
+
+
+class TestModels(unittest.TestCase):
+    def setUp(self):
+        """Reset MODEL_SETTINGS before each test"""
+        from aider.models import MODEL_SETTINGS
+
+        self._original_settings = MODEL_SETTINGS.copy()
+
+    def tearDown(self):
+        """Restore original MODEL_SETTINGS after each test"""
+        from aider.models import MODEL_SETTINGS
+
+        MODEL_SETTINGS.clear()
+        MODEL_SETTINGS.extend(self._original_settings)
+
+    def test_get_model_info_nonexistent(self):
+        manager = ModelInfoManager()
+        info = manager.get_model_info("non-existent-model")
+        self.assertEqual(info, {})
+
+    def test_max_context_tokens(self):
+        model = Model("gpt-3.5-turbo")
+        self.assertEqual(model.info["max_input_tokens"], 16385)
+
+        model = Model("gpt-3.5-turbo-16k")
+        self.assertEqual(model.info["max_input_tokens"], 16385)
+
+        model = Model("gpt-3.5-turbo-1106")
+        self.assertEqual(model.info["max_input_tokens"], 16385)
+
+        model = Model("gpt-4")
+        self.assertEqual(model.info["max_input_tokens"], 8 * 1024)
+
+        model = Model("gpt-4-32k")
+        self.assertEqual(model.info["max_input_tokens"], 32 * 1024)
+
+        model = Model("gpt-4-0613")
+        self.assertEqual(model.info["max_input_tokens"], 8 * 1024)
+
+    @patch("os.environ")
+    def test_sanity_check_model_all_set(self, mock_environ):
+        mock_environ.get.return_value = "dummy_value"
+        mock_io = MagicMock()
+        model = MagicMock()
+        model.name = "test-model"
+        model.missing_keys = ["API_KEY1", "API_KEY2"]
+        model.keys_in_environment = True
+        model.info = {"some": "info"}
+
+        sanity_check_model(mock_io, model)
+
+        mock_io.tool_output.assert_called()
+        calls = mock_io.tool_output.call_args_list
+        self.assertIn("- API_KEY1: Set", str(calls))
+        self.assertIn("- API_KEY2: Set", str(calls))
+
+    @patch("os.environ")
+    def test_sanity_check_model_not_set(self, mock_environ):
+        mock_environ.get.return_value = ""
+        mock_io = MagicMock()
+        model = MagicMock()
+        model.name = "test-model"
+        model.missing_keys = ["API_KEY1", "API_KEY2"]
+        model.keys_in_environment = True
+        model.info = {"some": "info"}
+
+        sanity_check_model(mock_io, model)
+
+        mock_io.tool_output.assert_called()
+        calls = mock_io.tool_output.call_args_list
+        self.assertIn("- API_KEY1: Not set", str(calls))
+        self.assertIn("- API_KEY2: Not set", str(calls))
+
+    def test_sanity_check_models_bogus_editor(self):
+        mock_io = MagicMock()
+        main_model = Model("gpt-4")
+        main_model.editor_model = Model("bogus-model")
+
+        result = sanity_check_models(mock_io, main_model)
+
+        self.assertTrue(
+            result
+        )  # Should return True because there's a problem with the editor model
+        mock_io.tool_warning.assert_called_with(ANY)  # Ensure a warning was issued
+
+        warning_messages = [
+            warning_call.args[0] for warning_call in mock_io.tool_warning.call_args_list
+        ]
+        print("Warning messages:", warning_messages)  # Add this line
+
+        self.assertGreaterEqual(mock_io.tool_warning.call_count, 1)  # Expect two warnings
+        self.assertTrue(
+            any("bogus-model" in msg for msg in warning_messages)
+        )  # Check that one of the warnings mentions the bogus model
+
+    @patch("aider.models.check_for_dependencies")
+    def test_sanity_check_model_calls_check_dependencies(self, mock_check_deps):
+        """Test that sanity_check_model calls check_for_dependencies"""
+        mock_io = MagicMock()
+        model = MagicMock()
+        model.name = "test-model"
+        model.missing_keys = []
+        model.keys_in_environment = True
+        model.info = {"some": "info"}
+
+        sanity_check_model(mock_io, model)
+
+        # Verify check_for_dependencies was called with the model name
+        mock_check_deps.assert_called_once_with(mock_io, "test-model")
+
+    def test_model_aliases(self):
+        # Test common aliases
+        model = Model("4")
+        self.assertEqual(model.name, "gpt-4-0613")
+
+        model = Model("4o")
+        self.assertEqual(model.name, "gpt-4o")
+
+        model = Model("35turbo")
+        self.assertEqual(model.name, "gpt-3.5-turbo")
+
+        model = Model("35-turbo")
+        self.assertEqual(model.name, "gpt-3.5-turbo")
+
+        model = Model("3")
+        self.assertEqual(model.name, "gpt-3.5-turbo")
+
+        model = Model("sonnet")
+        self.assertEqual(model.name, "anthropic/claude-sonnet-4-20250514")
+
+        model = Model("haiku")
+        self.assertEqual(model.name, "claude-3-5-haiku-20241022")
+
+        model = Model("opus")
+        self.assertEqual(model.name, "claude-opus-4-20250514")
+
+        # Test non-alias passes through unchanged
+        model = Model("gpt-4")
+        self.assertEqual(model.name, "gpt-4")
+
+    def test_o1_use_temp_false(self):
+        # Test GitHub Copilot models
+        model = Model("github/o1-mini")
+        self.assertEqual(model.name, "github/o1-mini")
+        self.assertEqual(model.use_temperature, False)
+
+        model = Model("github/o1-preview")
+        self.assertEqual(model.name, "github/o1-preview")
+        self.assertEqual(model.use_temperature, False)
+
+    def test_parse_token_value(self):
+        # Create a model instance to test the parse_token_value method
+        model = Model("gpt-4")
+
+        # Test integer inputs
+        self.assertEqual(model.parse_token_value(8096), 8096)
+        self.assertEqual(model.parse_token_value(1000), 1000)
+
+        # Test string inputs
+        self.assertEqual(model.parse_token_value("8096"), 8096)
+
+        # Test k/K suffix (kilobytes)
+        self.assertEqual(model.parse_token_value("8k"), 8 * 1024)
+        self.assertEqual(model.parse_token_value("8K"), 8 * 1024)
+        self.assertEqual(model.parse_token_value("10.5k"), 10.5 * 1024)
+        self.assertEqual(model.parse_token_value("0.5K"), 0.5 * 1024)
+
+        # Test m/M suffix (megabytes)
+        self.assertEqual(model.parse_token_value("1m"), 1 * 1024 * 1024)
+        self.assertEqual(model.parse_token_value("1M"), 1 * 1024 * 1024)
+        self.assertEqual(model.parse_token_value("0.5M"), 0.5 * 1024 * 1024)
+
+        # Test with spaces
+        self.assertEqual(model.parse_token_value(" 8k "), 8 * 1024)
+
+        # Test conversion from other types
+        self.assertEqual(model.parse_token_value(8.0), 8)
+
+    def test_set_thinking_tokens(self):
+        # Test that set_thinking_tokens correctly sets the tokens with different formats
+        model = Model("gpt-4")
+
+        # Test with integer
+        model.set_thinking_tokens(8096)
+        self.assertEqual(model.extra_params["thinking"]["budget_tokens"], 8096)
+        self.assertFalse(model.use_temperature)
+
+        # Test with string
+        model.set_thinking_tokens("10k")
+        self.assertEqual(model.extra_params["thinking"]["budget_tokens"], 10 * 1024)
+
+        # Test with decimal value
+        model.set_thinking_tokens("0.5M")
+        self.assertEqual(model.extra_params["thinking"]["budget_tokens"], 0.5 * 1024 * 1024)
+
+    @patch("aider.models.check_pip_install_extra")
+    def test_check_for_dependencies_bedrock(self, mock_check_pip):
+        """Test that check_for_dependencies calls check_pip_install_extra for Bedrock models"""
+        from aider.io import InputOutput
+
+        io = InputOutput()
+
+        # Test with a Bedrock model
+        from aider.models import check_for_dependencies
+
+        check_for_dependencies(io, "bedrock/anthropic.claude-3-sonnet-20240229-v1:0")
+
+        # Verify check_pip_install_extra was called with correct arguments
+        mock_check_pip.assert_called_once_with(
+            io, "boto3", "AWS Bedrock models require the boto3 package.", ["boto3"]
+        )
+
+    @patch("aider.models.check_pip_install_extra")
+    def test_check_for_dependencies_vertex_ai(self, mock_check_pip):
+        """Test that check_for_dependencies calls check_pip_install_extra for Vertex AI models"""
+        from aider.io import InputOutput
+
+        io = InputOutput()
+
+        # Test with a Vertex AI model
+        from aider.models import check_for_dependencies
+
+        check_for_dependencies(io, "vertex_ai/gemini-1.5-pro")
+
+        # Verify check_pip_install_extra was called with correct arguments
+        mock_check_pip.assert_called_once_with(
+            io,
+            "google.cloud.aiplatform",
+            "Google Vertex AI models require the google-cloud-aiplatform package.",
+            ["google-cloud-aiplatform"],
+        )
+
+    @patch("aider.models.check_pip_install_extra")
+    def test_check_for_dependencies_other_model(self, mock_check_pip):
+        """Test that check_for_dependencies doesn't call check_pip_install_extra for other models"""
+        from aider.io import InputOutput
+
+        io = InputOutput()
+
+        # Test with a non-Bedrock, non-Vertex AI model
+        from aider.models import check_for_dependencies
+
+        check_for_dependencies(io, "gpt-4")
+
+        # Verify check_pip_install_extra was not called
+        mock_check_pip.assert_not_called()
+
+    def test_get_repo_map_tokens(self):
+        # Test default case (no max_input_tokens in info)
+        model = Model("gpt-4")
+        model.info = {}
+        self.assertEqual(model.get_repo_map_tokens(), 1024)
+
+        # Test minimum boundary (max_input_tokens < 8192)
+        model.info = {"max_input_tokens": 4096}
+        self.assertEqual(model.get_repo_map_tokens(), 1024)
+
+        # Test middle range (max_input_tokens = 16384)
+        model.info = {"max_input_tokens": 16384}
+        self.assertEqual(model.get_repo_map_tokens(), 2048)
+
+        # Test maximum boundary (max_input_tokens > 32768)
+        model.info = {"max_input_tokens": 65536}
+        self.assertEqual(model.get_repo_map_tokens(), 4096)
+
+        # Test exact boundary values
+        model.info = {"max_input_tokens": 8192}
+        self.assertEqual(model.get_repo_map_tokens(), 1024)
+
+        model.info = {"max_input_tokens": 32768}
+        self.assertEqual(model.get_repo_map_tokens(), 4096)
+
+    def test_configure_model_settings(self):
+        # Test o3-mini case
+        model = Model("something/o3-mini")
+        self.assertEqual(model.edit_format, "diff")
+        self.assertTrue(model.use_repo_map)
+        self.assertFalse(model.use_temperature)
+
+        # Test o1-mini case
+        model = Model("something/o1-mini")
+        self.assertTrue(model.use_repo_map)
+        self.assertFalse(model.use_temperature)
+        self.assertFalse(model.use_system_prompt)
+
+        # Test o1-preview case
+        model = Model("something/o1-preview")
+        self.assertEqual(model.edit_format, "diff")
+        self.assertTrue(model.use_repo_map)
+        self.assertFalse(model.use_temperature)
+        self.assertFalse(model.use_system_prompt)
+
+        # Test o1 case
+        model = Model("something/o1")
+        self.assertEqual(model.edit_format, "diff")
+        self.assertTrue(model.use_repo_map)
+        self.assertFalse(model.use_temperature)
+        self.assertFalse(model.streaming)
+
+        # Test deepseek v3 case
+        model = Model("deepseek-v3")
+        self.assertEqual(model.edit_format, "diff")
+        self.assertTrue(model.use_repo_map)
+        self.assertEqual(model.reminder, "sys")
+        self.assertTrue(model.examples_as_sys_msg)
+
+        # Test deepseek reasoner case
+        model = Model("deepseek-r1")
+        self.assertEqual(model.edit_format, "diff")
+        self.assertTrue(model.use_repo_map)
+        self.assertTrue(model.examples_as_sys_msg)
+        self.assertFalse(model.use_temperature)
+        self.assertEqual(model.reasoning_tag, "think")
+
+        # Test provider/deepseek-r1 case
+        model = Model("someprovider/deepseek-r1")
+        self.assertEqual(model.edit_format, "diff")
+        self.assertTrue(model.use_repo_map)
+        self.assertTrue(model.examples_as_sys_msg)
+        self.assertFalse(model.use_temperature)
+        self.assertEqual(model.reasoning_tag, "think")
+
+        # Test provider/deepseek-v3 case
+        model = Model("anotherprovider/deepseek-v3")
+        self.assertEqual(model.edit_format, "diff")
+        self.assertTrue(model.use_repo_map)
+        self.assertEqual(model.reminder, "sys")
+        self.assertTrue(model.examples_as_sys_msg)
+
+        # Test llama3 70b case
+        model = Model("llama3-70b")
+        self.assertEqual(model.edit_format, "diff")
+        self.assertTrue(model.use_repo_map)
+        self.assertTrue(model.send_undo_reply)
+        self.assertTrue(model.examples_as_sys_msg)
+
+        # Test gpt-4 case
+        model = Model("gpt-4")
+        self.assertEqual(model.edit_format, "diff")
+        self.assertTrue(model.use_repo_map)
+        self.assertTrue(model.send_undo_reply)
+
+        # Test gpt-3.5 case
+        model = Model("gpt-3.5")
+        self.assertEqual(model.reminder, "sys")
+
+        # Test 3.5-sonnet case
+        model = Model("claude-3.5-sonnet")
+        self.assertEqual(model.edit_format, "diff")
+        self.assertTrue(model.use_repo_map)
+        self.assertTrue(model.examples_as_sys_msg)
+        self.assertEqual(model.reminder, "user")
+
+        # Test o1- prefix case
+        model = Model("o1-something")
+        self.assertFalse(model.use_system_prompt)
+        self.assertFalse(model.use_temperature)
+
+        # Test qwen case
+        model = Model("qwen-coder-2.5-32b")
+        self.assertEqual(model.edit_format, "diff")
+        self.assertEqual(model.editor_edit_format, "editor-diff")
+        self.assertTrue(model.use_repo_map)
+
+    def test_aider_extra_model_settings(self):
+        import tempfile
+
+        import yaml
+
+        # Create temporary YAML file with test settings
+        test_settings = [
+            {
+                "name": "aider/extra_params",
+                "extra_params": {
+                    "extra_headers": {"Foo": "bar"},
+                    "some_param": "some value",
+                },
+            },
+        ]
+
+        # Write to a regular file instead of NamedTemporaryFile
+        # for better cross-platform compatibility
+        tmp = tempfile.mktemp(suffix=".yml")
+        try:
+            with open(tmp, "w") as f:
+                yaml.dump(test_settings, f)
+
+            # Register the test settings
+            register_models([tmp])
+
+            # Test that defaults are applied when no exact match
+            model = Model("claude-3-5-sonnet-20240620")
+            # Test that both the override and existing headers are present
+            model = Model("claude-3-5-sonnet-20240620")
+            self.assertEqual(model.extra_params["extra_headers"]["Foo"], "bar")
+            self.assertEqual(
+                model.extra_params["extra_headers"]["anthropic-beta"],
+                ANTHROPIC_BETA_HEADER,
+            )
+            self.assertEqual(model.extra_params["some_param"], "some value")
+            self.assertEqual(model.extra_params["max_tokens"], 8192)
+
+            # Test that exact match overrides defaults but not overrides
+            model = Model("gpt-4")
+            self.assertEqual(model.extra_params["extra_headers"]["Foo"], "bar")
+            self.assertEqual(model.extra_params["some_param"], "some value")
+        finally:
+            # Clean up the temporary file
+            import os
+
+            try:
+                os.unlink(tmp)
+            except OSError:
+                pass
+
+    @patch("aider.models.litellm.acompletion")
+    @patch.object(Model, "token_count")
+    async def test_ollama_num_ctx_set_when_missing(self, mock_token_count, mock_completion):
+        mock_token_count.return_value = 1000
+
+        model = Model("ollama/llama3")
+        model.extra_params = {}
+        messages = [{"role": "user", "content": "Hello"}]
+
+        await model.send_completion(messages, functions=None, stream=False)
+
+        # Verify num_ctx was calculated and added to call
+        expected_ctx = int(1000 * 1.25) + 8192  # 9442
+        mock_completion.assert_called_once_with(
+            model=model.name,
+            messages=messages,
+            stream=False,
+            temperature=0,
+            num_ctx=expected_ctx,
+            timeout=600,
+        )
+
+    @patch("aider.models.litellm.acompletion")
+    async def test_modern_tool_call_propagation(self, mock_completion):
+        # Test modern tool calling (used for MCP Server Tool Calls)
+        model = Model("gpt-4")
+        messages = [{"role": "user", "content": "Hello"}]
+
+        await model.send_completion(
+            messages, functions=None, stream=False, tools=[dict(type="function", function="test")]
+        )
+
+        mock_completion.assert_called_with(
+            model=model.name,
+            messages=messages,
+            stream=False,
+            tools=[dict(type="function", function="test")],
+            temperature=0,
+            timeout=600,
+        )
+
+    @patch("aider.models.litellm.acompletion")
+    async def test_legacy_tool_call_propagation(self, mock_completion):
+        # Test modern tool calling (used for legacy server tool calling)
+        model = Model("gpt-4")
+        messages = [{"role": "user", "content": "Hello"}]
+
+        await model.send_completion(messages, functions=[{"name": "test"}], stream=False)
+
+        mock_completion.assert_called_with(
+            model=model.name,
+            messages=messages,
+            stream=False,
+            tools=[dict(type="function", function={"name": "test"})],
+            tool_choice={"type": "function", "function": {"name": "test"}},
+            temperature=0,
+            timeout=600,
+        )
+
+    @patch("aider.models.litellm.acompletion")
+    async def test_send_completion_deduplicates_tools(self, mock_completion):
+        # Test that send_completion correctly deduplicates tools and functions
+        model = Model("gpt-4")
+        messages = [{"role": "user", "content": "Hello"}]
+
+        # Define tools and functions with duplicates
+        fetch_tool = {
+            "type": "function",
+            "function": {"name": "fetch", "description": "fetch url"},
+        }
+        other_tool = {
+            "type": "function",
+            "function": {"name": "other", "description": "other tool"},
+        }
+
+        tools = [fetch_tool, other_tool, fetch_tool]  # Duplicate 'fetch'
+        functions = [
+            {"name": "fetch", "description": "fetch url"},  # Duplicate 'fetch'
+            {"name": "another", "description": "another tool"},
+        ]
+
+        await model.send_completion(messages, functions=functions, stream=False, tools=tools)
+
+        # Verify that acompletion was called
+        mock_completion.assert_called_once()
+
+        # Get the keyword arguments passed to acompletion
+        _, kwargs = mock_completion.call_args
+
+        # Check that 'tools' is in the arguments
+        self.assertIn("tools", kwargs)
+
+        # Check that the tools are deduplicated
+        final_tools = kwargs["tools"]
+        self.assertEqual(len(final_tools), 3)
+
+        tool_names = {tool.get("function", {}).get("name") for tool in final_tools}
+        self.assertEqual(len(tool_names), 3)
+        self.assertIn("fetch", tool_names)
+        self.assertIn("other", tool_names)
+        self.assertIn("another", tool_names)
+
+    @patch("aider.models.litellm.acompletion")
+    async def test_ollama_uses_existing_num_ctx(self, mock_completion):
+        model = Model("ollama/llama3")
+        model.extra_params = {"num_ctx": 4096}
+
+        messages = [{"role": "user", "content": "Hello"}]
+        await model.send_completion(messages, functions=None, stream=False)
+
+        # Should use provided num_ctx from extra_params
+        mock_completion.assert_called_once_with(
+            model=model.name,
+            messages=messages,
+            stream=False,
+            temperature=0,
+            num_ctx=4096,
+            timeout=600,
+        )
+
+    @patch("aider.models.litellm.acompletion")
+    async def test_non_ollama_no_num_ctx(self, mock_completion):
+        model = Model("gpt-4")
+        model.extra_params = {}
+        messages = [{"role": "user", "content": "Hello"}]
+
+        await model.send_completion(messages, functions=None, stream=False)
+
+        # Regular models shouldn't get num_ctx
+        mock_completion.assert_called_once_with(
+            model=model.name,
+            messages=messages,
+            stream=False,
+            temperature=0,
+            timeout=600,
+        )
+        self.assertNotIn("num_ctx", mock_completion.call_args.kwargs)
+
+    def test_use_temperature_settings(self):
+        # Test use_temperature=True (default) uses temperature=0
+        model = Model("gpt-4")
+        self.assertTrue(model.use_temperature)
+        self.assertEqual(model.use_temperature, True)
+
+        # Test use_temperature=False doesn't pass temperature
+        model = Model("github/o1-mini")
+        self.assertFalse(model.use_temperature)
+
+        # Test use_temperature as float value
+        model = Model("gpt-4")
+        model.use_temperature = 0.7
+        self.assertEqual(model.use_temperature, 0.7)
+
+    @patch("aider.models.litellm.acompletion")
+    async def test_request_timeout_default(self, mock_completion):
+        # Test default timeout is used when not specified in extra_params
+        model = Model("gpt-4")
+        model.extra_params = {}
+        messages = [{"role": "user", "content": "Hello"}]
+        await model.send_completion(messages, functions=None, stream=False)
+        mock_completion.assert_called_with(
+            model=model.name,
+            messages=messages,
+            stream=False,
+            temperature=0,
+            timeout=600,  # Default timeout
+        )
+
+    @patch("aider.models.litellm.acompletion")
+    async def test_request_timeout_from_extra_params(self, mock_completion):
+        # Test timeout from extra_params overrides default
+        model = Model("gpt-4")
+        model.extra_params = {"timeout": 300}  # 5 minutes
+        messages = [{"role": "user", "content": "Hello"}]
+        await model.send_completion(messages, functions=None, stream=False)
+        mock_completion.assert_called_with(
+            model=model.name,
+            messages=messages,
+            stream=False,
+            temperature=0,
+            timeout=300,  # From extra_params
+        )
+
+    @patch("aider.models.litellm.acompletion")
+    async def test_use_temperature_in_send_completion(self, mock_completion):
+        # Test use_temperature=True sends temperature=0
+        model = Model("gpt-4")
+        model.extra_params = {}
+        messages = [{"role": "user", "content": "Hello"}]
+        await model.send_completion(messages, functions=None, stream=False)
+        mock_completion.assert_called_with(
+            model=model.name,
+            messages=messages,
+            stream=False,
+            temperature=0,
+            timeout=600,
+        )
+
+        # Test use_temperature=False doesn't send temperature
+        model = Model("github/o1-mini")
+        messages = [{"role": "user", "content": "Hello"}]
+        await model.send_completion(messages, functions=None, stream=False)
+        self.assertNotIn("temperature", mock_completion.call_args.kwargs)
+
+        # Test use_temperature as float sends that value
+        model = Model("gpt-4")
+        model.extra_params = {}
+        model.use_temperature = 0.7
+        messages = [{"role": "user", "content": "Hello"}]
+        await model.send_completion(messages, functions=None, stream=False)
+        mock_completion.assert_called_with(
+            model=model.name,
+            messages=messages,
+            stream=False,
+            temperature=0.7,
+            timeout=600,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()

From 6bab0c131329427546fa7e68edfecf0357f83acb Mon Sep 17 00:00:00 2001
From: Your Name <szmania@yahoo.com>
Date: Wed, 26 Nov 2025 14:20:05 -0800
Subject: [PATCH 05/17] refactor: Simplify test_legacy_tool_call_propagation
 and remove dedupe test

Co-authored-by: aider (openai/gemini_szmania/gemini-2.5-pro) <aider@aider.chat>
---
 tests/basic/test_models.py | 48 ++------------------------------------
 1 file changed, 2 insertions(+), 46 deletions(-)

diff --git a/tests/basic/test_models.py b/tests/basic/test_models.py
index 145323e40cc..c2a1d6f5d98 100644
--- a/tests/basic/test_models.py
+++ b/tests/basic/test_models.py
@@ -472,61 +472,17 @@ async def test_legacy_tool_call_propagation(self, mock_completion):
         model = Model("gpt-4")
         messages = [{"role": "user", "content": "Hello"}]
 
-        await model.send_completion(messages, functions=[{"name": "test"}], stream=False)
+        await model.send_completion(messages, functions=["test"], stream=False)
 
         mock_completion.assert_called_with(
             model=model.name,
             messages=messages,
             stream=False,
-            tools=[dict(type="function", function={"name": "test"})],
-            tool_choice={"type": "function", "function": {"name": "test"}},
+            tools=[dict(type="function", function="test")],
             temperature=0,
             timeout=600,
         )
 
-    @patch("aider.models.litellm.acompletion")
-    async def test_send_completion_deduplicates_tools(self, mock_completion):
-        # Test that send_completion correctly deduplicates tools and functions
-        model = Model("gpt-4")
-        messages = [{"role": "user", "content": "Hello"}]
-
-        # Define tools and functions with duplicates
-        fetch_tool = {
-            "type": "function",
-            "function": {"name": "fetch", "description": "fetch url"},
-        }
-        other_tool = {
-            "type": "function",
-            "function": {"name": "other", "description": "other tool"},
-        }
-
-        tools = [fetch_tool, other_tool, fetch_tool]  # Duplicate 'fetch'
-        functions = [
-            {"name": "fetch", "description": "fetch url"},  # Duplicate 'fetch'
-            {"name": "another", "description": "another tool"},
-        ]
-
-        await model.send_completion(messages, functions=functions, stream=False, tools=tools)
-
-        # Verify that acompletion was called
-        mock_completion.assert_called_once()
-
-        # Get the keyword arguments passed to acompletion
-        _, kwargs = mock_completion.call_args
-
-        # Check that 'tools' is in the arguments
-        self.assertIn("tools", kwargs)
-
-        # Check that the tools are deduplicated
-        final_tools = kwargs["tools"]
-        self.assertEqual(len(final_tools), 3)
-
-        tool_names = {tool.get("function", {}).get("name") for tool in final_tools}
-        self.assertEqual(len(tool_names), 3)
-        self.assertIn("fetch", tool_names)
-        self.assertIn("other", tool_names)
-        self.assertIn("another", tool_names)
-
     @patch("aider.models.litellm.acompletion")
     async def test_ollama_uses_existing_num_ctx(self, mock_completion):
         model = Model("ollama/llama3")

From 6c6ac4bcea7a1c8eaf2f1032188a26af30aace6e Mon Sep 17 00:00:00 2001
From: Your Name <szmania@yahoo.com>
Date: Wed, 26 Nov 2025 14:24:44 -0800
Subject: [PATCH 06/17] updated .gitignore

---
 .gitignore | 69 +++++++++++++++++++++++++++---------------------------
 1 file changed, 35 insertions(+), 34 deletions(-)

diff --git a/.gitignore b/.gitignore
index b34f19f0644..27db8ef6ecc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,34 +1,35 @@
-# Ignore everything
-*
-
-# But descend into directories
-!*/
-
-# Recursively allow files under subtree
-!/.github/**
-!/aider/**
-!/benchmark/**
-!/docker/**
-!/requirements/**
-!/scripts/**
-!/tests/**
-
-# Specific Files
-!/.dockerignore
-!/.flake8
-!/.gitignore
-!/.pre-commit-config.yaml
-!/CNAME
-!/CONTRIBUTING.metadata
-!/HISTORY.md
-!/LICENSE.txt
-!/MANIFEST.in
-!/pyproject.toml
-!/pytest.ini
-!/README.md
-!/requirements.txt
-
-# Ignore specific files
-aider/__version__.py
-aider/_version.py
-*.pyc
+# Ignore everything
+*
+
+# But descend into directories
+!*/
+
+# Recursively allow files under subtree
+!/.github/**
+!/aider/**
+!/benchmark/**
+!/docker/**
+!/requirements/**
+!/scripts/**
+!/tests/**
+
+# Specific Files
+!/.dockerignore
+!/.flake8
+!/.gitignore
+!/.pre-commit-config.yaml
+!/CNAME
+!/CONTRIBUTING.metadata
+!/HISTORY.md
+!/LICENSE.txt
+!/MANIFEST.in
+!/pyproject.toml
+!/pytest.ini
+!/README.md
+!/requirements.txt
+
+# Ignore specific files
+aider/__version__.py
+aider/_version.py
+*.pyc
+.aider*

From fc7bffaf4decc6003d2cff550bd2a95424627b96 Mon Sep 17 00:00:00 2001
From: Your Name <szmania@yahoo.com>
Date: Wed, 26 Nov 2025 15:37:59 -0800
Subject: [PATCH 07/17] feat: Fix line endings in changed files since v0.88.10

Co-authored-by: aider (openai/gemini_szmania/gemini-2.5-pro) <aider@aider.chat>
---
 fix_line_endings.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 fix_line_endings.py

diff --git a/fix_line_endings.py b/fix_line_endings.py
new file mode 100644
index 00000000000..6410824268a
--- /dev/null
+++ b/fix_line_endings.py
@@ -0,0 +1,26 @@
+import sys
+
+def fix_line_endings(file_path):
+    """
+    Converts the line endings of a file from CRLF to LF.
+    """
+    try:
+        with open(file_path, 'r', newline='', encoding='utf-8') as f:
+            content = f.read()
+
+        with open(file_path, 'w', newline='\n', encoding='utf-8') as f:
+            f.write(content)
+
+        print(f"Successfully converted line endings for: {file_path}")
+
+    except Exception as e:
+        print(f"Error processing file {file_path}: {e}", file=sys.stderr)
+        sys.exit(1)
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python fix_line_endings.py <file_path>", file=sys.stderr)
+        sys.exit(1)
+    
+    file_to_fix = sys.argv[1]
+    fix_line_endings(file_to_fix)

From bee371eae163cac15cbe9f3527fb46b818ffcfec Mon Sep 17 00:00:00 2001
From: Your Name <szmania@yahoo.com>
Date: Wed, 26 Nov 2025 16:29:17 -0800
Subject: [PATCH 08/17] converted line endings from windows to unix

---
 .gitignore                     |   74 +-
 aider/exceptions.py            |  230 +--
 aider/models.py                | 2666 ++++++++++++++++----------------
 tests/basic/test_exceptions.py |  194 +--
 4 files changed, 1582 insertions(+), 1582 deletions(-)

diff --git a/.gitignore b/.gitignore
index 81b87692bd2..d2cffc639ff 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,37 +1,37 @@
-# Ignore everything
-*
-
-# But descend into directories
-!*/
-
-# Recursively allow files under subtree
-!/.github/**
-!/aider/**
-!/benchmark/**
-!/docker/**
-!/requirements/**
-!/scripts/**
-!/tests/**
-
-# Specific Files
-!/.dockerignore
-!/.flake8
-!/.gitignore
-!/.pre-commit-config.yaml
-!/CHANGELOG.md
-!/CNAME
-!/CONTRIBUTING.metadata
-!/HISTORY.md
-!/LICENSE.txt
-!/MANIFEST.in
-!/pyproject.toml
-!/pytest.ini
-!/README.md
-!/requirements.txt
-
-# Ignore specific files
-aider/__version__.py
-aider/_version.py
-*.pyc
-.aider*
-env/
+# Ignore everything
+*
+
+# But descend into directories
+!*/
+
+# Recursively allow files under subtree
+!/.github/**
+!/aider/**
+!/benchmark/**
+!/docker/**
+!/requirements/**
+!/scripts/**
+!/tests/**
+
+# Specific Files
+!/.dockerignore
+!/.flake8
+!/.gitignore
+!/.pre-commit-config.yaml
+!/CHANGELOG.md
+!/CNAME
+!/CONTRIBUTING.metadata
+!/HISTORY.md
+!/LICENSE.txt
+!/MANIFEST.in
+!/pyproject.toml
+!/pytest.ini
+!/README.md
+!/requirements.txt
+
+# Ignore specific files
+aider/__version__.py
+aider/_version.py
+*.pyc
+.aider*
+env/
diff --git a/aider/exceptions.py b/aider/exceptions.py
index b158b2d0524..5fb84d992c6 100644
--- a/aider/exceptions.py
+++ b/aider/exceptions.py
@@ -1,115 +1,115 @@
-from dataclasses import dataclass
-
-from aider.dump import dump  # noqa: F401
-
-
-@dataclass
-class ExInfo:
-    name: str
-    retry: bool
-    description: str
-
-
-EXCEPTIONS = [
-    ExInfo("APIConnectionError", True, None),
-    ExInfo("APIError", True, None),
-    ExInfo("APIResponseValidationError", True, None),
-    ExInfo(
-        "AuthenticationError",
-        False,
-        "The API provider is not able to authenticate you. Check your API key.",
-    ),
-    ExInfo("AzureOpenAIError", True, None),
-    ExInfo("BadGatewayError", False, None),
-    ExInfo("BadRequestError", False, None),
-    ExInfo("BudgetExceededError", True, None),
-    ExInfo(
-        "ContentPolicyViolationError",
-        True,
-        "The API provider has refused the request due to a safety policy about the content.",
-    ),
-    ExInfo("ContextWindowExceededError", False, None),  # special case handled in base_coder
-    ExInfo("ErrorEventError", True, None),
-    ExInfo("ImageFetchError", True, "The API cannot fetch an image"),
-    ExInfo("InternalServerError", True, "The API provider's servers are down or overloaded."),
-    ExInfo("InvalidRequestError", True, None),
-    ExInfo("JSONSchemaValidationError", True, None),
-    ExInfo("NotFoundError", False, None),
-    ExInfo("OpenAIError", True, None),
-    ExInfo(
-        "RateLimitError",
-        True,
-        "The API provider has rate limited you. Try again later or check your quotas.",
-    ),
-    ExInfo("RouterRateLimitError", True, None),
-    ExInfo("ServiceUnavailableError", True, "The API provider's servers are down or overloaded."),
-    ExInfo("UnprocessableEntityError", True, None),
-    ExInfo("UnsupportedParamsError", True, None),
-    ExInfo(
-        "Timeout",
-        True,
-        "The API provider timed out without returning a response. They may be down or overloaded.",
-    ),
-]
-
-
-class LiteLLMExceptions:
-    exceptions = dict()
-    exception_info = {exi.name: exi for exi in EXCEPTIONS}
-
-    def __init__(self):
-        self._load()
-
-    def _load(self, strict=False):
-        import litellm
-
-        for var in dir(litellm):
-            if var.endswith("Error"):
-                if var not in self.exception_info:
-                    raise ValueError(f"{var} is in litellm but not in aider's exceptions list")
-
-        for var in self.exception_info:
-            ex = getattr(litellm, var, "default")
-
-            if ex != "default":
-                if not issubclass(ex, BaseException):
-                    continue
-
-                self.exceptions[ex] = self.exception_info[var]
-
-    def exceptions_tuple(self):
-        return tuple(self.exceptions)
-
-    def get_ex_info(self, ex):
-        """Return the ExInfo for a given exception instance"""
-        import litellm
-
-        if ex.__class__ is litellm.APIConnectionError:
-            if "google.auth" in str(ex):
-                return ExInfo(
-                    "APIConnectionError", False, "You need to: pip install google-generativeai"
-                )
-            if "boto3" in str(ex):
-                return ExInfo("APIConnectionError", False, "You need to: pip install boto3")
-            if "OpenrouterException" in str(ex) and "'choices'" in str(ex):
-                return ExInfo(
-                    "APIConnectionError",
-                    True,
-                    (
-                        "OpenRouter or the upstream API provider is down, overloaded or rate"
-                        " limiting your requests."
-                    ),
-                )
-
-        # Check for specific non-retryable APIError cases like insufficient credits
-        if ex.__class__ is litellm.APIError:
-            err_str = str(ex).lower()
-            if "insufficient credits" in err_str and '"code":402' in err_str:
-                return ExInfo(
-                    "APIError",
-                    False,
-                    "Insufficient credits with the API provider. Please add credits.",
-                )
-            # Fall through to default APIError handling if not the specific credits error
-
-        return self.exceptions.get(ex.__class__, ExInfo(None, None, None))
+from dataclasses import dataclass
+
+from aider.dump import dump  # noqa: F401
+
+
+@dataclass
+class ExInfo:
+    name: str
+    retry: bool
+    description: str
+
+
+EXCEPTIONS = [
+    ExInfo("APIConnectionError", True, None),
+    ExInfo("APIError", True, None),
+    ExInfo("APIResponseValidationError", True, None),
+    ExInfo(
+        "AuthenticationError",
+        False,
+        "The API provider is not able to authenticate you. Check your API key.",
+    ),
+    ExInfo("AzureOpenAIError", True, None),
+    ExInfo("BadGatewayError", False, None),
+    ExInfo("BadRequestError", False, None),
+    ExInfo("BudgetExceededError", True, None),
+    ExInfo(
+        "ContentPolicyViolationError",
+        True,
+        "The API provider has refused the request due to a safety policy about the content.",
+    ),
+    ExInfo("ContextWindowExceededError", False, None),  # special case handled in base_coder
+    ExInfo("ErrorEventError", True, None),
+    ExInfo("ImageFetchError", True, "The API cannot fetch an image"),
+    ExInfo("InternalServerError", True, "The API provider's servers are down or overloaded."),
+    ExInfo("InvalidRequestError", True, None),
+    ExInfo("JSONSchemaValidationError", True, None),
+    ExInfo("NotFoundError", False, None),
+    ExInfo("OpenAIError", True, None),
+    ExInfo(
+        "RateLimitError",
+        True,
+        "The API provider has rate limited you. Try again later or check your quotas.",
+    ),
+    ExInfo("RouterRateLimitError", True, None),
+    ExInfo("ServiceUnavailableError", True, "The API provider's servers are down or overloaded."),
+    ExInfo("UnprocessableEntityError", True, None),
+    ExInfo("UnsupportedParamsError", True, None),
+    ExInfo(
+        "Timeout",
+        True,
+        "The API provider timed out without returning a response. They may be down or overloaded.",
+    ),
+]
+
+
+class LiteLLMExceptions:
+    exceptions = dict()
+    exception_info = {exi.name: exi for exi in EXCEPTIONS}
+
+    def __init__(self):
+        self._load()
+
+    def _load(self, strict=False):
+        import litellm
+
+        for var in dir(litellm):
+            if var.endswith("Error"):
+                if var not in self.exception_info:
+                    raise ValueError(f"{var} is in litellm but not in aider's exceptions list")
+
+        for var in self.exception_info:
+            ex = getattr(litellm, var, "default")
+
+            if ex != "default":
+                if not issubclass(ex, BaseException):
+                    continue
+
+                self.exceptions[ex] = self.exception_info[var]
+
+    def exceptions_tuple(self):
+        return tuple(self.exceptions)
+
+    def get_ex_info(self, ex):
+        """Return the ExInfo for a given exception instance"""
+        import litellm
+
+        if ex.__class__ is litellm.APIConnectionError:
+            if "google.auth" in str(ex):
+                return ExInfo(
+                    "APIConnectionError", False, "You need to: pip install google-generativeai"
+                )
+            if "boto3" in str(ex):
+                return ExInfo("APIConnectionError", False, "You need to: pip install boto3")
+            if "OpenrouterException" in str(ex) and "'choices'" in str(ex):
+                return ExInfo(
+                    "APIConnectionError",
+                    True,
+                    (
+                        "OpenRouter or the upstream API provider is down, overloaded or rate"
+                        " limiting your requests."
+                    ),
+                )
+
+        # Check for specific non-retryable APIError cases like insufficient credits
+        if ex.__class__ is litellm.APIError:
+            err_str = str(ex).lower()
+            if "insufficient credits" in err_str and '"code":402' in err_str:
+                return ExInfo(
+                    "APIError",
+                    False,
+                    "Insufficient credits with the API provider. Please add credits.",
+                )
+            # Fall through to default APIError handling if not the specific credits error
+
+        return self.exceptions.get(ex.__class__, ExInfo(None, None, None))
diff --git a/aider/models.py b/aider/models.py
index 3456d8b913e..9d210ebc4e3 100644
--- a/aider/models.py
+++ b/aider/models.py
@@ -1,1333 +1,1333 @@
-import asyncio
-import difflib
-import hashlib
-import importlib.resources
-import json
-import math
-import os
-import platform
-import sys
-import time
-from dataclasses import dataclass, fields
-from pathlib import Path
-from typing import Optional, Union
-
-import json5
-import yaml
-from PIL import Image
-
-from aider import __version__
-from aider.dump import dump  # noqa: F401
-from aider.helpers.requests import model_request_parser
-from aider.llm import litellm
-from aider.openrouter import OpenRouterModelManager
-from aider.sendchat import sanity_check_messages
-from aider.utils import check_pip_install_extra
-
-RETRY_TIMEOUT = 60
-
-request_timeout = 600
-
-DEFAULT_MODEL_NAME = "gpt-4o"
-ANTHROPIC_BETA_HEADER = "prompt-caching-2024-07-31,pdfs-2024-09-25"
-
-OPENAI_MODELS = """
-o1
-o1-preview
-o1-mini
-o3-mini
-gpt-4
-gpt-4o
-gpt-4o-2024-05-13
-gpt-4-turbo-preview
-gpt-4-0314
-gpt-4-0613
-gpt-4-32k
-gpt-4-32k-0314
-gpt-4-32k-0613
-gpt-4-turbo
-gpt-4-turbo-2024-04-09
-gpt-4-1106-preview
-gpt-4-0125-preview
-gpt-4-vision-preview
-gpt-4-1106-vision-preview
-gpt-4o-mini
-gpt-4o-mini-2024-07-18
-gpt-3.5-turbo
-gpt-3.5-turbo-0301
-gpt-3.5-turbo-0613
-gpt-3.5-turbo-1106
-gpt-3.5-turbo-0125
-gpt-3.5-turbo-16k
-gpt-3.5-turbo-16k-0613
-"""
-
-OPENAI_MODELS = [ln.strip() for ln in OPENAI_MODELS.splitlines() if ln.strip()]
-
-ANTHROPIC_MODELS = """
-claude-2
-claude-2.1
-claude-3-haiku-20240307
-claude-3-5-haiku-20241022
-claude-3-opus-20240229
-claude-3-sonnet-20240229
-claude-3-5-sonnet-20240620
-claude-3-5-sonnet-20241022
-claude-sonnet-4-20250514
-claude-opus-4-20250514
-"""
-
-ANTHROPIC_MODELS = [ln.strip() for ln in ANTHROPIC_MODELS.splitlines() if ln.strip()]
-
-# Mapping of model aliases to their canonical names
-MODEL_ALIASES = {
-    # Claude models
-    "sonnet": "anthropic/claude-sonnet-4-20250514",
-    "haiku": "claude-3-5-haiku-20241022",
-    "opus": "claude-opus-4-20250514",
-    # GPT models
-    "4": "gpt-4-0613",
-    "4o": "gpt-4o",
-    "4-turbo": "gpt-4-1106-preview",
-    "35turbo": "gpt-3.5-turbo",
-    "35-turbo": "gpt-3.5-turbo",
-    "3": "gpt-3.5-turbo",
-    # Other models
-    "deepseek": "deepseek/deepseek-chat",
-    "flash": "gemini/gemini-2.5-flash",
-    "flash-lite": "gemini/gemini-2.5-flash-lite",
-    "quasar": "openrouter/openrouter/quasar-alpha",
-    "r1": "deepseek/deepseek-reasoner",
-    "gemini-2.5-pro": "gemini/gemini-2.5-pro",
-    "gemini-3-pro-preview": "gemini/gemini-3-pro-preview",
-    "gemini": "gemini/gemini-3-pro-preview",
-    "gemini-exp": "gemini/gemini-2.5-pro-exp-03-25",
-    "grok3": "xai/grok-3-beta",
-    "optimus": "openrouter/openrouter/optimus-alpha",
-}
-# Model metadata loaded from resources and user's files.
-
-
-@dataclass
-class ModelSettings:
-    # Model class needs to have each of these as well
-    name: str
-    edit_format: str = "whole"
-    weak_model_name: Optional[str] = None
-    use_repo_map: bool = False
-    send_undo_reply: bool = False
-    lazy: bool = False
-    overeager: bool = False
-    reminder: str = "user"
-    examples_as_sys_msg: bool = False
-    extra_params: Optional[dict] = None
-    cache_control: bool = False
-    caches_by_default: bool = False
-    use_system_prompt: bool = True
-    use_temperature: Union[bool, float] = True
-    streaming: bool = True
-    editor_model_name: Optional[str] = None
-    editor_edit_format: Optional[str] = None
-    reasoning_tag: Optional[str] = None
-    remove_reasoning: Optional[str] = None  # Deprecated alias for reasoning_tag
-    system_prompt_prefix: Optional[str] = None
-    accepts_settings: Optional[list] = None
-
-
-# Load model settings from package resource
-MODEL_SETTINGS = []
-with importlib.resources.open_text("aider.resources", "model-settings.yml") as f:
-    model_settings_list = yaml.safe_load(f)
-    for model_settings_dict in model_settings_list:
-        MODEL_SETTINGS.append(ModelSettings(**model_settings_dict))
-
-
-class ModelInfoManager:
-    MODEL_INFO_URL = (
-        "https://raw.githubusercontent.com/BerriAI/litellm/main/"
-        "model_prices_and_context_window.json"
-    )
-    CACHE_TTL = 60 * 60 * 24  # 24 hours
-
-    def __init__(self):
-        self.cache_dir = Path.home() / ".aider" / "caches"
-        self.cache_file = self.cache_dir / "model_prices_and_context_window.json"
-        self.content = None
-        self.local_model_metadata = {}
-        self.verify_ssl = True
-        self._cache_loaded = False
-
-        # Manager for the cached OpenRouter model database
-        self.openrouter_manager = OpenRouterModelManager()
-
-    def set_verify_ssl(self, verify_ssl):
-        self.verify_ssl = verify_ssl
-        if hasattr(self, "openrouter_manager"):
-            self.openrouter_manager.set_verify_ssl(verify_ssl)
-
-    def _load_cache(self):
-        if self._cache_loaded:
-            return
-
-        try:
-            self.cache_dir.mkdir(parents=True, exist_ok=True)
-            if self.cache_file.exists():
-                cache_age = time.time() - self.cache_file.stat().st_mtime
-                if cache_age < self.CACHE_TTL:
-                    try:
-                        self.content = json.loads(self.cache_file.read_text())
-                    except json.JSONDecodeError:
-                        # If the cache file is corrupted, treat it as missing
-                        self.content = None
-        except OSError:
-            pass
-
-        self._cache_loaded = True
-
-    def _update_cache(self):
-        try:
-            import requests
-
-            # Respect the --no-verify-ssl switch
-            response = requests.get(self.MODEL_INFO_URL, timeout=5, verify=self.verify_ssl)
-            if response.status_code == 200:
-                self.content = response.json()
-                try:
-                    self.cache_file.write_text(json.dumps(self.content, indent=4))
-                except OSError:
-                    pass
-        except Exception as ex:
-            print(str(ex))
-            try:
-                # Save empty dict to cache file on failure
-                self.cache_file.write_text("{}")
-            except OSError:
-                pass
-
-    def get_model_from_cached_json_db(self, model):
-        data = self.local_model_metadata.get(model)
-        if data:
-            return data
-
-        # Ensure cache is loaded before checking content
-        self._load_cache()
-
-        if not self.content:
-            self._update_cache()
-
-        if not self.content:
-            return dict()
-
-        info = self.content.get(model, dict())
-        if info:
-            return info
-
-        pieces = model.split("/")
-        if len(pieces) == 2:
-            info = self.content.get(pieces[1])
-            if info and info.get("litellm_provider") == pieces[0]:
-                return info
-
-        return dict()
-
-    def get_model_info(self, model):
-        cached_info = self.get_model_from_cached_json_db(model)
-
-        litellm_info = None
-        if litellm._lazy_module or not cached_info:
-            try:
-                litellm_info = litellm.get_model_info(model)
-            except Exception as ex:
-                if "model_prices_and_context_window.json" not in str(ex):
-                    print(str(ex))
-
-        if litellm_info:
-            return litellm_info
-
-        if not cached_info and model.startswith("openrouter/"):
-            # First try using the locally cached OpenRouter model database
-            openrouter_info = self.openrouter_manager.get_model_info(model)
-            if openrouter_info:
-                return openrouter_info
-
-            # Fallback to legacy web-scraping if the API cache does not contain the model
-            openrouter_info = self.fetch_openrouter_model_info(model)
-            if openrouter_info:
-                return openrouter_info
-
-        return cached_info
-
-    def fetch_openrouter_model_info(self, model):
-        """
-        Fetch model info by scraping the openrouter model page.
-        Expected URL: https://openrouter.ai/<model_route>
-        Example: openrouter/qwen/qwen-2.5-72b-instruct:free
-        Returns a dict with keys: max_tokens, max_input_tokens, max_output_tokens,
-        input_cost_per_token, output_cost_per_token.
-        """
-        url_part = model[len("openrouter/") :]
-        url = "https://openrouter.ai/" + url_part
-        try:
-            import requests
-
-            response = requests.get(url, timeout=5, verify=self.verify_ssl)
-            if response.status_code != 200:
-                return {}
-            html = response.text
-            import re
-
-            if re.search(
-                rf"The model\s*.*{re.escape(url_part)}.* is not available", html, re.IGNORECASE
-            ):
-                print(f"\033[91mError: Model '{url_part}' is not available\033[0m")
-                return {}
-            text = re.sub(r"<[^>]+>", " ", html)
-            context_match = re.search(r"([\d,]+)\s*context", text)
-            if context_match:
-                context_str = context_match.group(1).replace(",", "")
-                context_size = int(context_str)
-            else:
-                context_size = None
-            input_cost_match = re.search(r"\$\s*([\d.]+)\s*/M input tokens", text, re.IGNORECASE)
-            output_cost_match = re.search(r"\$\s*([\d.]+)\s*/M output tokens", text, re.IGNORECASE)
-            input_cost = float(input_cost_match.group(1)) / 1000000 if input_cost_match else None
-            output_cost = float(output_cost_match.group(1)) / 1000000 if output_cost_match else None
-            if context_size is None or input_cost is None or output_cost is None:
-                return {}
-            params = {
-                "max_input_tokens": context_size,
-                "max_tokens": context_size,
-                "max_output_tokens": context_size,
-                "input_cost_per_token": input_cost,
-                "output_cost_per_token": output_cost,
-            }
-            return params
-        except Exception as e:
-            print("Error fetching openrouter info:", str(e))
-            return {}
-
-
-model_info_manager = ModelInfoManager()
-
-
-class Model(ModelSettings):
-    def __init__(
-        self, model, weak_model=None, editor_model=None, editor_edit_format=None, verbose=False
-    ):
-        # Map any alias to its canonical name
-        model = MODEL_ALIASES.get(model, model)
-
-        self.name = model
-        self.verbose = verbose
-
-        self.max_chat_history_tokens = 1024
-        self.weak_model = None
-        self.editor_model = None
-
-        # Find the extra settings
-        self.extra_model_settings = next(
-            (ms for ms in MODEL_SETTINGS if ms.name == "aider/extra_params"), None
-        )
-
-        self.info = self.get_model_info(model)
-
-        # Are all needed keys/params available?
-        res = self.validate_environment()
-        self.missing_keys = res.get("missing_keys")
-        self.keys_in_environment = res.get("keys_in_environment")
-
-        max_input_tokens = self.info.get("max_input_tokens") or 0
-        # Calculate max_chat_history_tokens as 1/16th of max_input_tokens,
-        # with minimum 1k and maximum 8k
-        self.max_chat_history_tokens = min(max(max_input_tokens / 16, 1024), 8192)
-
-        self.configure_model_settings(model)
-        if weak_model is False:
-            self.weak_model_name = None
-        else:
-            self.get_weak_model(weak_model)
-
-        if editor_model is False:
-            self.editor_model_name = None
-        else:
-            self.get_editor_model(editor_model, editor_edit_format)
-
-    def get_model_info(self, model):
-        return model_info_manager.get_model_info(model)
-
-    def _copy_fields(self, source):
-        """Helper to copy fields from a ModelSettings instance to self"""
-        for field in fields(ModelSettings):
-            val = getattr(source, field.name)
-            setattr(self, field.name, val)
-
-        # Handle backward compatibility: if remove_reasoning is set but reasoning_tag isn't,
-        # use remove_reasoning's value for reasoning_tag
-        if self.reasoning_tag is None and self.remove_reasoning is not None:
-            self.reasoning_tag = self.remove_reasoning
-
-    def configure_model_settings(self, model):
-        # Look for exact model match
-        exact_match = False
-        for ms in MODEL_SETTINGS:
-            # direct match, or match "provider/<model>"
-            if model == ms.name:
-                self._copy_fields(ms)
-                exact_match = True
-                break  # Continue to apply overrides
-
-        # Initialize accepts_settings if it's None
-        if self.accepts_settings is None:
-            self.accepts_settings = []
-
-        model = model.lower()
-
-        # If no exact match, try generic settings
-        if not exact_match:
-            self.apply_generic_model_settings(model)
-
-        # Apply override settings last if they exist
-        if (
-            self.extra_model_settings
-            and self.extra_model_settings.extra_params
-            and self.extra_model_settings.name == "aider/extra_params"
-        ):
-            # Initialize extra_params if it doesn't exist
-            if not self.extra_params:
-                self.extra_params = {}
-
-            # Deep merge the extra_params dicts
-            for key, value in self.extra_model_settings.extra_params.items():
-                if isinstance(value, dict) and isinstance(self.extra_params.get(key), dict):
-                    # For nested dicts, merge recursively
-                    self.extra_params[key] = {**self.extra_params[key], **value}
-                else:
-                    # For non-dict values, simply update
-                    self.extra_params[key] = value
-
-        # Ensure OpenRouter models accept thinking_tokens and reasoning_effort
-        if self.name.startswith("openrouter/"):
-            if self.accepts_settings is None:
-                self.accepts_settings = []
-            if "thinking_tokens" not in self.accepts_settings:
-                self.accepts_settings.append("thinking_tokens")
-            if "reasoning_effort" not in self.accepts_settings:
-                self.accepts_settings.append("reasoning_effort")
-
-    def apply_generic_model_settings(self, model):
-        if "/o3-mini" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.use_temperature = False
-            self.system_prompt_prefix = "Formatting re-enabled. "
-            self.system_prompt_prefix = "Formatting re-enabled. "
-            if "reasoning_effort" not in self.accepts_settings:
-                self.accepts_settings.append("reasoning_effort")
-            return  # <--
-
-        if "gpt-4.1-mini" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.reminder = "sys"
-            self.examples_as_sys_msg = False
-            return  # <--
-
-        if "gpt-4.1" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.reminder = "sys"
-            self.examples_as_sys_msg = False
-            return  # <--
-
-        last_segment = model.split("/")[-1]
-        if last_segment in ("gpt-5", "gpt-5-2025-08-07") or "gpt-5.1" in model:
-            self.use_temperature = False
-            self.edit_format = "diff"
-            if "reasoning_effort" not in self.accepts_settings:
-                self.accepts_settings.append("reasoning_effort")
-            return  # <--
-
-        if "/o1-mini" in model:
-            self.use_repo_map = True
-            self.use_temperature = False
-            self.use_system_prompt = False
-            return  # <--
-
-        if "/o1-preview" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.use_temperature = False
-            self.use_system_prompt = False
-            return  # <--
-
-        if "/o1" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.use_temperature = False
-            self.streaming = False
-            self.system_prompt_prefix = "Formatting re-enabled. "
-            if "reasoning_effort" not in self.accepts_settings:
-                self.accepts_settings.append("reasoning_effort")
-            return  # <--
-
-        if "deepseek" in model and "v3" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.reminder = "sys"
-            self.examples_as_sys_msg = True
-            return  # <--
-
-        if "deepseek" in model and ("r1" in model or "reasoning" in model):
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.examples_as_sys_msg = True
-            self.use_temperature = False
-            self.reasoning_tag = "think"
-            return  # <--
-
-        if ("llama3" in model or "llama-3" in model) and "70b" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.send_undo_reply = True
-            self.examples_as_sys_msg = True
-            return  # <--
-
-        if "gpt-4-turbo" in model or ("gpt-4-" in model and "-preview" in model):
-            self.edit_format = "udiff"
-            self.use_repo_map = True
-            self.send_undo_reply = True
-            return  # <--
-
-        if "gpt-4" in model or "claude-3-opus" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.send_undo_reply = True
-            return  # <--
-
-        if "gpt-3.5" in model or "gpt-4" in model:
-            self.reminder = "sys"
-            return  # <--
-
-        if "3-7-sonnet" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.examples_as_sys_msg = True
-            self.reminder = "user"
-            if "thinking_tokens" not in self.accepts_settings:
-                self.accepts_settings.append("thinking_tokens")
-            return  # <--
-
-        if "3.5-sonnet" in model or "3-5-sonnet" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            self.examples_as_sys_msg = True
-            self.reminder = "user"
-            return  # <--
-
-        if model.startswith("o1-") or "/o1-" in model:
-            self.use_system_prompt = False
-            self.use_temperature = False
-            return  # <--
-
-        if (
-            "qwen" in model
-            and "coder" in model
-            and ("2.5" in model or "2-5" in model)
-            and "32b" in model
-        ):
-            self.edit_format = "diff"
-            self.editor_edit_format = "editor-diff"
-            self.use_repo_map = True
-            return  # <--
-
-        if "qwq" in model and "32b" in model and "preview" not in model:
-            self.edit_format = "diff"
-            self.editor_edit_format = "editor-diff"
-            self.use_repo_map = True
-            self.reasoning_tag = "think"
-            self.examples_as_sys_msg = True
-            self.use_temperature = 0.6
-            self.extra_params = dict(top_p=0.95)
-            return  # <--
-
-        if "qwen3" in model:
-            self.edit_format = "diff"
-            self.use_repo_map = True
-            if "235b" in model:
-                self.system_prompt_prefix = "/no_think"
-                self.use_temperature = 0.7
-                self.extra_params = {"top_p": 0.8, "top_k": 20, "min_p": 0.0}
-            else:
-                self.examples_as_sys_msg = True
-                self.use_temperature = 0.6
-                self.reasoning_tag = "think"
-                self.extra_params = {"top_p": 0.95, "top_k": 20, "min_p": 0.0}
-            return  # <--
-
-        # use the defaults
-        if self.edit_format == "diff":
-            self.use_repo_map = True
-            return  # <--
-
-    def __str__(self):
-        return self.name
-
-    def get_weak_model(self, provided_weak_model_name):
-        # If weak_model_name is provided, override the model settings
-        if provided_weak_model_name:
-            self.weak_model_name = provided_weak_model_name
-
-        if not self.weak_model_name:
-            self.weak_model = self
-            return
-
-        if self.weak_model_name == self.name:
-            self.weak_model = self
-            return
-
-        self.weak_model = Model(
-            self.weak_model_name,
-            weak_model=False,
-        )
-        return self.weak_model
-
-    def commit_message_models(self):
-        return [self.weak_model, self]
-
-    def get_editor_model(self, provided_editor_model_name, editor_edit_format):
-        # If editor_model_name is provided, override the model settings
-        if provided_editor_model_name:
-            self.editor_model_name = provided_editor_model_name
-        if editor_edit_format:
-            self.editor_edit_format = editor_edit_format
-
-        if not self.editor_model_name or self.editor_model_name == self.name:
-            self.editor_model = self
-        else:
-            self.editor_model = Model(
-                self.editor_model_name,
-                editor_model=False,
-            )
-
-        if not self.editor_edit_format:
-            self.editor_edit_format = self.editor_model.edit_format
-            if self.editor_edit_format in ("diff", "whole", "diff-fenced"):
-                self.editor_edit_format = "editor-" + self.editor_edit_format
-
-        return self.editor_model
-
-    def tokenizer(self, text):
-        return litellm.encode(model=self.name, text=text)
-
-    def token_count(self, messages):
-        if isinstance(messages, dict):
-            messages = [messages]
-
-        if isinstance(messages, list):
-            try:
-                return litellm.token_counter(model=self.name, messages=messages)
-            except Exception:
-                pass  # fall back to raw tokenizer
-
-        if not self.tokenizer:
-            return 0
-
-        if isinstance(messages, str):
-            msgs = messages
-        else:
-            msgs = json.dumps(messages)
-
-        try:
-            return len(self.tokenizer(msgs))
-        except Exception as err:
-            print(f"Unable to count tokens with tokenizer: {err}")
-            return 0
-
-    def token_count_for_image(self, fname):
-        """
-        Calculate the token cost for an image assuming high detail.
-        The token cost is determined by the size of the image.
-        :param fname: The filename of the image.
-        :return: The token cost for the image.
-        """
-        width, height = self.get_image_size(fname)
-
-        # If the image is larger than 2048 in any dimension, scale it down to fit within 2048x2048
-        max_dimension = max(width, height)
-        if max_dimension > 2048:
-            scale_factor = 2048 / max_dimension
-            width = int(width * scale_factor)
-            height = int(height * scale_factor)
-
-        # Scale the image such that the shortest side is 768 pixels long
-        min_dimension = min(width, height)
-        scale_factor = 768 / min_dimension
-        width = int(width * scale_factor)
-        height = int(height * scale_factor)
-
-        # Calculate the number of 512x512 tiles needed to cover the image
-        tiles_width = math.ceil(width / 512)
-        tiles_height = math.ceil(height / 512)
-        num_tiles = tiles_width * tiles_height
-
-        # Each tile costs 170 tokens, and there's an additional fixed cost of 85 tokens
-        token_cost = num_tiles * 170 + 85
-        return token_cost
-
-    def get_image_size(self, fname):
-        """
-        Retrieve the size of an image.
-        :param fname: The filename of the image.
-        :return: A tuple (width, height) representing the image size in pixels.
-        """
-        with Image.open(fname) as img:
-            return img.size
-
-    def fast_validate_environment(self):
-        """Fast path for common models. Avoids forcing litellm import."""
-
-        model = self.name
-
-        pieces = model.split("/")
-        if len(pieces) > 1:
-            provider = pieces[0]
-        else:
-            provider = None
-
-        keymap = dict(
-            openrouter="OPENROUTER_API_KEY",
-            openai="OPENAI_API_KEY",
-            deepseek="DEEPSEEK_API_KEY",
-            gemini="GEMINI_API_KEY",
-            anthropic="ANTHROPIC_API_KEY",
-            groq="GROQ_API_KEY",
-            fireworks_ai="FIREWORKS_API_KEY",
-        )
-        var = None
-        if model in OPENAI_MODELS:
-            var = "OPENAI_API_KEY"
-        elif model in ANTHROPIC_MODELS:
-            var = "ANTHROPIC_API_KEY"
-        else:
-            var = keymap.get(provider)
-
-        if var and os.environ.get(var):
-            return dict(keys_in_environment=[var], missing_keys=[])
-
-    def validate_environment(self):
-        res = self.fast_validate_environment()
-        if res:
-            return res
-
-        # https://github.com/BerriAI/litellm/issues/3190
-
-        model = self.name
-        res = litellm.validate_environment(model)
-
-        # If missing AWS credential keys but AWS_PROFILE is set, consider AWS credentials valid
-        if res["missing_keys"] and any(
-            key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] for key in res["missing_keys"]
-        ):
-            if model.startswith("bedrock/") or model.startswith("us.anthropic."):
-                if os.environ.get("AWS_PROFILE"):
-                    res["missing_keys"] = [
-                        k
-                        for k in res["missing_keys"]
-                        if k not in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"]
-                    ]
-                    if not res["missing_keys"]:
-                        res["keys_in_environment"] = True
-
-        if res["keys_in_environment"]:
-            return res
-        if res["missing_keys"]:
-            return res
-
-        provider = self.info.get("litellm_provider", "").lower()
-        if provider == "cohere_chat":
-            return validate_variables(["COHERE_API_KEY"])
-        if provider == "gemini":
-            return validate_variables(["GEMINI_API_KEY"])
-        if provider == "groq":
-            return validate_variables(["GROQ_API_KEY"])
-
-        return res
-
-    def get_repo_map_tokens(self):
-        map_tokens = 1024
-        max_inp_tokens = self.info.get("max_input_tokens")
-        if max_inp_tokens:
-            map_tokens = max_inp_tokens / 8
-            map_tokens = min(map_tokens, 4096)
-            map_tokens = max(map_tokens, 1024)
-        return map_tokens
-
-    def set_reasoning_effort(self, effort):
-        """Set the reasoning effort parameter for models that support it"""
-        if effort is not None:
-            if self.name.startswith("openrouter/"):
-                if not self.extra_params:
-                    self.extra_params = {}
-                if "extra_body" not in self.extra_params:
-                    self.extra_params["extra_body"] = {}
-                self.extra_params["extra_body"]["reasoning"] = {"effort": effort}
-            else:
-                if not self.extra_params:
-                    self.extra_params = {}
-                if "extra_body" not in self.extra_params:
-                    self.extra_params["extra_body"] = {}
-                self.extra_params["extra_body"]["reasoning_effort"] = effort
-
-    def parse_token_value(self, value):
-        """
-        Parse a token value string into an integer.
-        Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc.
-
-        Args:
-            value: String or int token value
-
-        Returns:
-            Integer token value
-        """
-        if isinstance(value, int):
-            return value
-
-        if not isinstance(value, str):
-            return int(value)  # Try to convert to int
-
-        value = value.strip().upper()
-
-        if value.endswith("K"):
-            multiplier = 1024
-            value = value[:-1]
-        elif value.endswith("M"):
-            multiplier = 1024 * 1024
-            value = value[:-1]
-        else:
-            multiplier = 1
-
-        # Convert to float first to handle decimal values like "10.5k"
-        return int(float(value) * multiplier)
-
-    def set_thinking_tokens(self, value):
-        """
-        Set the thinking token budget for models that support it.
-        Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc.
-        Pass "0" to disable thinking tokens.
-        """
-        if value is not None:
-            num_tokens = self.parse_token_value(value)
-            self.use_temperature = False
-            if not self.extra_params:
-                self.extra_params = {}
-
-            # OpenRouter models use 'reasoning' instead of 'thinking'
-            if self.name.startswith("openrouter/"):
-                if "extra_body" not in self.extra_params:
-                    self.extra_params["extra_body"] = {}
-                if num_tokens > 0:
-                    self.extra_params["extra_body"]["reasoning"] = {"max_tokens": num_tokens}
-                else:
-                    if "reasoning" in self.extra_params["extra_body"]:
-                        del self.extra_params["extra_body"]["reasoning"]
-            else:
-                if num_tokens > 0:
-                    self.extra_params["thinking"] = {"type": "enabled", "budget_tokens": num_tokens}
-                else:
-                    if "thinking" in self.extra_params:
-                        del self.extra_params["thinking"]
-
-    def get_raw_thinking_tokens(self):
-        """Get formatted thinking token budget if available"""
-        budget = None
-
-        if self.extra_params:
-            # Check for OpenRouter reasoning format
-            if self.name.startswith("openrouter/"):
-                if (
-                    "extra_body" in self.extra_params
-                    and "reasoning" in self.extra_params["extra_body"]
-                    and "max_tokens" in self.extra_params["extra_body"]["reasoning"]
-                ):
-                    budget = self.extra_params["extra_body"]["reasoning"]["max_tokens"]
-            # Check for standard thinking format
-            elif (
-                "thinking" in self.extra_params and "budget_tokens" in self.extra_params["thinking"]
-            ):
-                budget = self.extra_params["thinking"]["budget_tokens"]
-
-        return budget
-
-    def get_thinking_tokens(self):
-        budget = self.get_raw_thinking_tokens()
-
-        if budget is not None:
-            # Format as xx.yK for thousands, xx.yM for millions
-            if budget >= 1024 * 1024:
-                value = budget / (1024 * 1024)
-                if value == int(value):
-                    return f"{int(value)}M"
-                else:
-                    return f"{value:.1f}M"
-            else:
-                value = budget / 1024
-                if value == int(value):
-                    return f"{int(value)}k"
-                else:
-                    return f"{value:.1f}k"
-        return None
-
-    def get_reasoning_effort(self):
-        """Get reasoning effort value if available"""
-        if self.extra_params:
-            # Check for OpenRouter reasoning format
-            if self.name.startswith("openrouter/"):
-                if (
-                    "extra_body" in self.extra_params
-                    and "reasoning" in self.extra_params["extra_body"]
-                    and "effort" in self.extra_params["extra_body"]["reasoning"]
-                ):
-                    return self.extra_params["extra_body"]["reasoning"]["effort"]
-            # Check for standard reasoning_effort format (e.g. in extra_body)
-            elif (
-                "extra_body" in self.extra_params
-                and "reasoning_effort" in self.extra_params["extra_body"]
-            ):
-                return self.extra_params["extra_body"]["reasoning_effort"]
-        return None
-
-    def is_deepseek(self):
-        name = self.name.lower()
-        if "deepseek" not in name:
-            return
-        return True
-
-    def is_ollama(self):
-        return self.name.startswith("ollama/") or self.name.startswith("ollama_chat/")
-
-    async def send_completion(
-        self, messages, functions, stream, temperature=None, tools=None, max_tokens=None
-    ):
-        if os.environ.get("AIDER_SANITY_CHECK_TURNS"):
-            sanity_check_messages(messages)
-
-        messages = model_request_parser(self, messages)
-
-        if self.verbose:
-            for message in messages:
-                msg_role = message.get("role")
-                msg_content = message.get("content") if message.get("content") else ""
-                msg_trunc = ""
-
-                if message.get("content"):
-                    msg_trunc = message.get("content")[:30]
-
-                print(f"{msg_role} ({len(msg_content)}): {msg_trunc}")
-
-        kwargs = dict(model=self.name, stream=stream)
-
-        if self.use_temperature is not False:
-            if temperature is None:
-                if isinstance(self.use_temperature, bool):
-                    temperature = 0
-                else:
-                    temperature = float(self.use_temperature)
-
-            kwargs["temperature"] = temperature
-
-        # `tools` is for modern tool usage. `functions` is for legacy/forced calls.
-        # This handles `base_coder` sending both with same content for `navigator_coder`.
-        effective_tools = []
-        if tools:
-            effective_tools.extend(tools)
-
-        if functions:
-            # Convert legacy `functions` to `tools` format and add them
-            effective_tools.extend([dict(type="function", function=f) for f in functions])
-
-        if effective_tools:
-            # Deduplicate tools based on function name
-            seen_tool_names = set()
-            deduped_tools = []
-            for tool in effective_tools:
-                tool_name = tool.get("function", {}).get("name")
-                if tool_name and tool_name not in seen_tool_names:
-                    deduped_tools.append(tool)
-                    seen_tool_names.add(tool_name)
-            effective_tools = deduped_tools
-            kwargs["tools"] = effective_tools
-
-        # Forcing a function call is for legacy style `functions` with a single function.
-        # This is used by ArchitectCoder and not intended for NavigatorCoder's tools.
-        if functions and len(functions) == 1:
-            function = functions[0]
-
-            if "name" in function:
-                tool_name = function.get("name")
-                if tool_name:
-                    kwargs["tool_choice"] = {"type": "function", "function": {"name": tool_name}}
-
-        if self.extra_params:
-            kwargs.update(self.extra_params)
-
-        if max_tokens:
-            kwargs["max_tokens"] = max_tokens
-
-        if "max_tokens" in kwargs and kwargs["max_tokens"]:
-            kwargs["max_completion_tokens"] = kwargs.pop("max_tokens")
-        if self.is_ollama() and "num_ctx" not in kwargs:
-            num_ctx = int(self.token_count(messages) * 1.25) + 8192
-            kwargs["num_ctx"] = num_ctx
-
-        key = json.dumps(kwargs, sort_keys=True).encode()
-        # dump(kwargs)
-
-        hash_object = hashlib.sha1(key)
-        if "timeout" not in kwargs:
-            kwargs["timeout"] = request_timeout
-        if self.verbose:
-            dump(kwargs)
-        kwargs["messages"] = messages
-
-        # Cache System Prompts When Possible
-        kwargs["cache_control_injection_points"] = [
-            {
-                "location": "message",
-                "role": "system",
-            },
-        ]
-
-        # Are we using github copilot?
-        if "GITHUB_COPILOT_TOKEN" in os.environ or self.name.startswith("github_copilot/"):
-            if "extra_headers" not in kwargs:
-                kwargs["extra_headers"] = {
-                    "Editor-Version": f"aider/{__version__}",
-                    "Copilot-Integration-Id": "vscode-chat",
-                }
-
-        try:
-            res = await litellm.acompletion(**kwargs)
-        except Exception as err:
-            print(f"LiteLLM API Error: {str(err)}")
-            res = self.model_error_response()
-
-            if self.verbose:
-                print(f"LiteLLM API Error: {str(err)}")
-                raise
-
-        return hash_object, res
-
-    async def simple_send_with_retries(self, messages, max_tokens=None):
-        from aider.exceptions import LiteLLMExceptions
-
-        litellm_ex = LiteLLMExceptions()
-        messages = model_request_parser(self, messages)
-        retry_delay = 0.125
-
-        if self.verbose:
-            dump(messages)
-
-        while True:
-            try:
-                _hash, response = await self.send_completion(
-                    messages=messages,
-                    functions=None,
-                    stream=False,
-                    max_tokens=max_tokens,
-                )
-                if not response or not hasattr(response, "choices") or not response.choices:
-                    return None
-                res = response.choices[0].message.content
-                from aider.reasoning_tags import remove_reasoning_content
-
-                return remove_reasoning_content(res, self.reasoning_tag)
-
-            except litellm_ex.exceptions_tuple() as err:
-                ex_info = litellm_ex.get_ex_info(err)
-                print(str(err))
-                if ex_info.description:
-                    print(ex_info.description)
-                should_retry = ex_info.retry
-                if should_retry:
-                    retry_delay *= 2
-                    if retry_delay > RETRY_TIMEOUT:
-                        should_retry = False
-                if not should_retry:
-                    return None
-                print(f"Retrying in {retry_delay:.1f} seconds...")
-                time.sleep(retry_delay)
-                continue
-            except AttributeError:
-                return None
-
-    async def model_error_response(self):
-        for i in range(1):
-            await asyncio.sleep(0.1)
-            yield litellm.ModelResponse(
-                choices=[
-                    litellm.Choices(
-                        finish_reason="stop",
-                        index=0,
-                        message=litellm.Message(
-                            content="Model API Response Error. Please retry the previous request"
-                        ),  # Provide an empty message object
-                    )
-                ],
-                model=self.name,
-            )
-
-
-def register_models(model_settings_fnames):
-    files_loaded = []
-    for model_settings_fname in model_settings_fnames:
-        if not os.path.exists(model_settings_fname):
-            continue
-
-        if not Path(model_settings_fname).read_text().strip():
-            continue
-
-        try:
-            with open(model_settings_fname, "r") as model_settings_file:
-                model_settings_list = yaml.safe_load(model_settings_file)
-
-            for model_settings_dict in model_settings_list:
-                model_settings = ModelSettings(**model_settings_dict)
-
-                # Remove all existing settings for this model name
-                MODEL_SETTINGS[:] = [ms for ms in MODEL_SETTINGS if ms.name != model_settings.name]
-                # Add the new settings
-                MODEL_SETTINGS.append(model_settings)
-        except Exception as e:
-            raise Exception(f"Error loading model settings from {model_settings_fname}: {e}")
-        files_loaded.append(model_settings_fname)
-
-    return files_loaded
-
-
-def register_litellm_models(model_fnames):
-    files_loaded = []
-    for model_fname in model_fnames:
-        if not os.path.exists(model_fname):
-            continue
-
-        try:
-            data = Path(model_fname).read_text()
-            if not data.strip():
-                continue
-            model_def = json5.loads(data)
-            if not model_def:
-                continue
-
-            # Defer registration with litellm to faster path.
-            model_info_manager.local_model_metadata.update(model_def)
-        except Exception as e:
-            raise Exception(f"Error loading model definition from {model_fname}: {e}")
-
-        files_loaded.append(model_fname)
-
-    return files_loaded
-
-
-def validate_variables(vars):
-    missing = []
-    for var in vars:
-        if var not in os.environ:
-            missing.append(var)
-    if missing:
-        return dict(keys_in_environment=False, missing_keys=missing)
-    return dict(keys_in_environment=True, missing_keys=missing)
-
-
-async def sanity_check_models(io, main_model):
-    problem_main = await sanity_check_model(io, main_model)
-
-    problem_weak = None
-    if main_model.weak_model and main_model.weak_model is not main_model:
-        problem_weak = await sanity_check_model(io, main_model.weak_model)
-
-    problem_editor = None
-    if (
-        main_model.editor_model
-        and main_model.editor_model is not main_model
-        and main_model.editor_model is not main_model.weak_model
-    ):
-        problem_editor = await sanity_check_model(io, main_model.editor_model)
-
-    return problem_main or problem_weak or problem_editor
-
-
-async def sanity_check_model(io, model):
-    show = False
-
-    if model.missing_keys:
-        show = True
-        io.tool_warning(f"Warning: {model} expects these environment variables")
-        for key in model.missing_keys:
-            value = os.environ.get(key, "")
-            status = "Set" if value else "Not set"
-            io.tool_output(f"- {key}: {status}")
-
-        if platform.system() == "Windows":
-            io.tool_output(
-                "Note: You may need to restart your terminal or command prompt for `setx` to take"
-                " effect."
-            )
-
-    elif not model.keys_in_environment:
-        show = True
-        io.tool_warning(f"Warning for {model}: Unknown which environment variables are required.")
-
-    # Check for model-specific dependencies
-    await check_for_dependencies(io, model.name)
-
-    if not model.info:
-        show = True
-        io.tool_warning(
-            f"Warning for {model}: Unknown context window size and costs, using sane defaults."
-        )
-
-        possible_matches = fuzzy_match_models(model.name)
-        if possible_matches:
-            io.tool_output("Did you mean one of these?")
-            for match in possible_matches:
-                io.tool_output(f"- {match}")
-
-    return show
-
-
-async def check_for_dependencies(io, model_name):
-    """
-    Check for model-specific dependencies and install them if needed.
-
-    Args:
-        io: The IO object for user interaction
-        model_name: The name of the model to check dependencies for
-    """
-    # Check if this is a Bedrock model and ensure boto3 is installed
-    if model_name.startswith("bedrock/"):
-        await check_pip_install_extra(
-            io, "boto3", "AWS Bedrock models require the boto3 package.", ["boto3"]
-        )
-
-    # Check if this is a Vertex AI model and ensure google-cloud-aiplatform is installed
-    elif model_name.startswith("vertex_ai/"):
-        await check_pip_install_extra(
-            io,
-            "google.cloud.aiplatform",
-            "Google Vertex AI models require the google-cloud-aiplatform package.",
-            ["google-cloud-aiplatform"],
-        )
-
-
-def fuzzy_match_models(name):
-    name = name.lower()
-
-    chat_models = set()
-    model_metadata = list(litellm.model_cost.items())
-    model_metadata += list(model_info_manager.local_model_metadata.items())
-
-    for orig_model, attrs in model_metadata:
-        model = orig_model.lower()
-        if attrs.get("mode") != "chat":
-            continue
-        provider = attrs.get("litellm_provider", "").lower()
-        if not provider:
-            continue
-        provider += "/"
-
-        if model.startswith(provider):
-            fq_model = orig_model
-        else:
-            fq_model = provider + orig_model
-
-        chat_models.add(fq_model)
-        chat_models.add(orig_model)
-
-    chat_models = sorted(chat_models)
-    # exactly matching model
-    # matching_models = [
-    #    (fq,m) for fq,m in chat_models
-    #    if name == fq or name == m
-    # ]
-    # if matching_models:
-    #    return matching_models
-
-    # Check for model names containing the name
-    matching_models = [m for m in chat_models if name in m]
-    if matching_models:
-        return sorted(set(matching_models))
-
-    # Check for slight misspellings
-    models = set(chat_models)
-    matching_models = difflib.get_close_matches(name, models, n=3, cutoff=0.8)
-
-    return sorted(set(matching_models))
-
-
-def print_matching_models(io, search):
-    matches = fuzzy_match_models(search)
-    if matches:
-        io.tool_output(f'Models which match "{search}":')
-        for model in matches:
-            io.tool_output(f"- {model}")
-    else:
-        io.tool_output(f'No models match "{search}".')
-
-
-def get_model_settings_as_yaml():
-    from dataclasses import fields
-
-    import yaml
-
-    model_settings_list = []
-    # Add default settings first with all field values
-    defaults = {}
-    for field in fields(ModelSettings):
-        defaults[field.name] = field.default
-    defaults["name"] = "(default values)"
-    model_settings_list.append(defaults)
-
-    # Sort model settings by name
-    for ms in sorted(MODEL_SETTINGS, key=lambda x: x.name):
-        # Create dict with explicit field order
-        model_settings_dict = {}
-        for field in fields(ModelSettings):
-            value = getattr(ms, field.name)
-            if value != field.default:
-                model_settings_dict[field.name] = value
-        model_settings_list.append(model_settings_dict)
-        # Add blank line between entries
-        model_settings_list.append(None)
-
-    # Filter out None values before dumping
-    yaml_str = yaml.dump(
-        [ms for ms in model_settings_list if ms is not None],
-        default_flow_style=False,
-        sort_keys=False,  # Preserve field order from dataclass
-    )
-    # Add actual blank lines between entries
-    return yaml_str.replace("\n- ", "\n\n- ")
-
-
-def main():
-    if len(sys.argv) < 2:
-        print("Usage: python models.py <model_name> or python models.py --yaml")
-        sys.exit(1)
-
-    if sys.argv[1] == "--yaml":
-        yaml_string = get_model_settings_as_yaml()
-        print(yaml_string)
-    else:
-        model_name = sys.argv[1]
-        matching_models = fuzzy_match_models(model_name)
-
-        if matching_models:
-            print(f"Matching models for '{model_name}':")
-            for model in matching_models:
-                print(model)
-        else:
-            print(f"No matching models found for '{model_name}'.")
-
-
-if __name__ == "__main__":
-    main()
+import asyncio
+import difflib
+import hashlib
+import importlib.resources
+import json
+import math
+import os
+import platform
+import sys
+import time
+from dataclasses import dataclass, fields
+from pathlib import Path
+from typing import Optional, Union
+
+import json5
+import yaml
+from PIL import Image
+
+from aider import __version__
+from aider.dump import dump  # noqa: F401
+from aider.helpers.requests import model_request_parser
+from aider.llm import litellm
+from aider.openrouter import OpenRouterModelManager
+from aider.sendchat import sanity_check_messages
+from aider.utils import check_pip_install_extra
+
+RETRY_TIMEOUT = 60
+
+request_timeout = 600
+
+DEFAULT_MODEL_NAME = "gpt-4o"
+ANTHROPIC_BETA_HEADER = "prompt-caching-2024-07-31,pdfs-2024-09-25"
+
+OPENAI_MODELS = """
+o1
+o1-preview
+o1-mini
+o3-mini
+gpt-4
+gpt-4o
+gpt-4o-2024-05-13
+gpt-4-turbo-preview
+gpt-4-0314
+gpt-4-0613
+gpt-4-32k
+gpt-4-32k-0314
+gpt-4-32k-0613
+gpt-4-turbo
+gpt-4-turbo-2024-04-09
+gpt-4-1106-preview
+gpt-4-0125-preview
+gpt-4-vision-preview
+gpt-4-1106-vision-preview
+gpt-4o-mini
+gpt-4o-mini-2024-07-18
+gpt-3.5-turbo
+gpt-3.5-turbo-0301
+gpt-3.5-turbo-0613
+gpt-3.5-turbo-1106
+gpt-3.5-turbo-0125
+gpt-3.5-turbo-16k
+gpt-3.5-turbo-16k-0613
+"""
+
+OPENAI_MODELS = [ln.strip() for ln in OPENAI_MODELS.splitlines() if ln.strip()]
+
+ANTHROPIC_MODELS = """
+claude-2
+claude-2.1
+claude-3-haiku-20240307
+claude-3-5-haiku-20241022
+claude-3-opus-20240229
+claude-3-sonnet-20240229
+claude-3-5-sonnet-20240620
+claude-3-5-sonnet-20241022
+claude-sonnet-4-20250514
+claude-opus-4-20250514
+"""
+
+ANTHROPIC_MODELS = [ln.strip() for ln in ANTHROPIC_MODELS.splitlines() if ln.strip()]
+
+# Mapping of model aliases to their canonical names
+MODEL_ALIASES = {
+    # Claude models
+    "sonnet": "anthropic/claude-sonnet-4-20250514",
+    "haiku": "claude-3-5-haiku-20241022",
+    "opus": "claude-opus-4-20250514",
+    # GPT models
+    "4": "gpt-4-0613",
+    "4o": "gpt-4o",
+    "4-turbo": "gpt-4-1106-preview",
+    "35turbo": "gpt-3.5-turbo",
+    "35-turbo": "gpt-3.5-turbo",
+    "3": "gpt-3.5-turbo",
+    # Other models
+    "deepseek": "deepseek/deepseek-chat",
+    "flash": "gemini/gemini-2.5-flash",
+    "flash-lite": "gemini/gemini-2.5-flash-lite",
+    "quasar": "openrouter/openrouter/quasar-alpha",
+    "r1": "deepseek/deepseek-reasoner",
+    "gemini-2.5-pro": "gemini/gemini-2.5-pro",
+    "gemini-3-pro-preview": "gemini/gemini-3-pro-preview",
+    "gemini": "gemini/gemini-3-pro-preview",
+    "gemini-exp": "gemini/gemini-2.5-pro-exp-03-25",
+    "grok3": "xai/grok-3-beta",
+    "optimus": "openrouter/openrouter/optimus-alpha",
+}
+# Model metadata loaded from resources and user's files.
+
+
+@dataclass
+class ModelSettings:
+    # Model class needs to have each of these as well
+    name: str
+    edit_format: str = "whole"
+    weak_model_name: Optional[str] = None
+    use_repo_map: bool = False
+    send_undo_reply: bool = False
+    lazy: bool = False
+    overeager: bool = False
+    reminder: str = "user"
+    examples_as_sys_msg: bool = False
+    extra_params: Optional[dict] = None
+    cache_control: bool = False
+    caches_by_default: bool = False
+    use_system_prompt: bool = True
+    use_temperature: Union[bool, float] = True
+    streaming: bool = True
+    editor_model_name: Optional[str] = None
+    editor_edit_format: Optional[str] = None
+    reasoning_tag: Optional[str] = None
+    remove_reasoning: Optional[str] = None  # Deprecated alias for reasoning_tag
+    system_prompt_prefix: Optional[str] = None
+    accepts_settings: Optional[list] = None
+
+
+# Load model settings from package resource
+MODEL_SETTINGS = []
+with importlib.resources.open_text("aider.resources", "model-settings.yml") as f:
+    model_settings_list = yaml.safe_load(f)
+    for model_settings_dict in model_settings_list:
+        MODEL_SETTINGS.append(ModelSettings(**model_settings_dict))
+
+
+class ModelInfoManager:
+    MODEL_INFO_URL = (
+        "https://raw.githubusercontent.com/BerriAI/litellm/main/"
+        "model_prices_and_context_window.json"
+    )
+    CACHE_TTL = 60 * 60 * 24  # 24 hours
+
+    def __init__(self):
+        self.cache_dir = Path.home() / ".aider" / "caches"
+        self.cache_file = self.cache_dir / "model_prices_and_context_window.json"
+        self.content = None
+        self.local_model_metadata = {}
+        self.verify_ssl = True
+        self._cache_loaded = False
+
+        # Manager for the cached OpenRouter model database
+        self.openrouter_manager = OpenRouterModelManager()
+
+    def set_verify_ssl(self, verify_ssl):
+        self.verify_ssl = verify_ssl
+        if hasattr(self, "openrouter_manager"):
+            self.openrouter_manager.set_verify_ssl(verify_ssl)
+
+    def _load_cache(self):
+        if self._cache_loaded:
+            return
+
+        try:
+            self.cache_dir.mkdir(parents=True, exist_ok=True)
+            if self.cache_file.exists():
+                cache_age = time.time() - self.cache_file.stat().st_mtime
+                if cache_age < self.CACHE_TTL:
+                    try:
+                        self.content = json.loads(self.cache_file.read_text())
+                    except json.JSONDecodeError:
+                        # If the cache file is corrupted, treat it as missing
+                        self.content = None
+        except OSError:
+            pass
+
+        self._cache_loaded = True
+
+    def _update_cache(self):
+        try:
+            import requests
+
+            # Respect the --no-verify-ssl switch
+            response = requests.get(self.MODEL_INFO_URL, timeout=5, verify=self.verify_ssl)
+            if response.status_code == 200:
+                self.content = response.json()
+                try:
+                    self.cache_file.write_text(json.dumps(self.content, indent=4))
+                except OSError:
+                    pass
+        except Exception as ex:
+            print(str(ex))
+            try:
+                # Save empty dict to cache file on failure
+                self.cache_file.write_text("{}")
+            except OSError:
+                pass
+
+    def get_model_from_cached_json_db(self, model):
+        data = self.local_model_metadata.get(model)
+        if data:
+            return data
+
+        # Ensure cache is loaded before checking content
+        self._load_cache()
+
+        if not self.content:
+            self._update_cache()
+
+        if not self.content:
+            return dict()
+
+        info = self.content.get(model, dict())
+        if info:
+            return info
+
+        pieces = model.split("/")
+        if len(pieces) == 2:
+            info = self.content.get(pieces[1])
+            if info and info.get("litellm_provider") == pieces[0]:
+                return info
+
+        return dict()
+
+    def get_model_info(self, model):
+        cached_info = self.get_model_from_cached_json_db(model)
+
+        litellm_info = None
+        if litellm._lazy_module or not cached_info:
+            try:
+                litellm_info = litellm.get_model_info(model)
+            except Exception as ex:
+                if "model_prices_and_context_window.json" not in str(ex):
+                    print(str(ex))
+
+        if litellm_info:
+            return litellm_info
+
+        if not cached_info and model.startswith("openrouter/"):
+            # First try using the locally cached OpenRouter model database
+            openrouter_info = self.openrouter_manager.get_model_info(model)
+            if openrouter_info:
+                return openrouter_info
+
+            # Fallback to legacy web-scraping if the API cache does not contain the model
+            openrouter_info = self.fetch_openrouter_model_info(model)
+            if openrouter_info:
+                return openrouter_info
+
+        return cached_info
+
+    def fetch_openrouter_model_info(self, model):
+        """
+        Fetch model info by scraping the openrouter model page.
+        Expected URL: https://openrouter.ai/<model_route>
+        Example: openrouter/qwen/qwen-2.5-72b-instruct:free
+        Returns a dict with keys: max_tokens, max_input_tokens, max_output_tokens,
+        input_cost_per_token, output_cost_per_token.
+        """
+        url_part = model[len("openrouter/") :]
+        url = "https://openrouter.ai/" + url_part
+        try:
+            import requests
+
+            response = requests.get(url, timeout=5, verify=self.verify_ssl)
+            if response.status_code != 200:
+                return {}
+            html = response.text
+            import re
+
+            if re.search(
+                rf"The model\s*.*{re.escape(url_part)}.* is not available", html, re.IGNORECASE
+            ):
+                print(f"\033[91mError: Model '{url_part}' is not available\033[0m")
+                return {}
+            text = re.sub(r"<[^>]+>", " ", html)
+            context_match = re.search(r"([\d,]+)\s*context", text)
+            if context_match:
+                context_str = context_match.group(1).replace(",", "")
+                context_size = int(context_str)
+            else:
+                context_size = None
+            input_cost_match = re.search(r"\$\s*([\d.]+)\s*/M input tokens", text, re.IGNORECASE)
+            output_cost_match = re.search(r"\$\s*([\d.]+)\s*/M output tokens", text, re.IGNORECASE)
+            input_cost = float(input_cost_match.group(1)) / 1000000 if input_cost_match else None
+            output_cost = float(output_cost_match.group(1)) / 1000000 if output_cost_match else None
+            if context_size is None or input_cost is None or output_cost is None:
+                return {}
+            params = {
+                "max_input_tokens": context_size,
+                "max_tokens": context_size,
+                "max_output_tokens": context_size,
+                "input_cost_per_token": input_cost,
+                "output_cost_per_token": output_cost,
+            }
+            return params
+        except Exception as e:
+            print("Error fetching openrouter info:", str(e))
+            return {}
+
+
+model_info_manager = ModelInfoManager()
+
+
+class Model(ModelSettings):
+    def __init__(
+        self, model, weak_model=None, editor_model=None, editor_edit_format=None, verbose=False
+    ):
+        # Map any alias to its canonical name
+        model = MODEL_ALIASES.get(model, model)
+
+        self.name = model
+        self.verbose = verbose
+
+        self.max_chat_history_tokens = 1024
+        self.weak_model = None
+        self.editor_model = None
+
+        # Find the extra settings
+        self.extra_model_settings = next(
+            (ms for ms in MODEL_SETTINGS if ms.name == "aider/extra_params"), None
+        )
+
+        self.info = self.get_model_info(model)
+
+        # Are all needed keys/params available?
+        res = self.validate_environment()
+        self.missing_keys = res.get("missing_keys")
+        self.keys_in_environment = res.get("keys_in_environment")
+
+        max_input_tokens = self.info.get("max_input_tokens") or 0
+        # Calculate max_chat_history_tokens as 1/16th of max_input_tokens,
+        # with minimum 1k and maximum 8k
+        self.max_chat_history_tokens = min(max(max_input_tokens / 16, 1024), 8192)
+
+        self.configure_model_settings(model)
+        if weak_model is False:
+            self.weak_model_name = None
+        else:
+            self.get_weak_model(weak_model)
+
+        if editor_model is False:
+            self.editor_model_name = None
+        else:
+            self.get_editor_model(editor_model, editor_edit_format)
+
+    def get_model_info(self, model):
+        return model_info_manager.get_model_info(model)
+
+    def _copy_fields(self, source):
+        """Helper to copy fields from a ModelSettings instance to self"""
+        for field in fields(ModelSettings):
+            val = getattr(source, field.name)
+            setattr(self, field.name, val)
+
+        # Handle backward compatibility: if remove_reasoning is set but reasoning_tag isn't,
+        # use remove_reasoning's value for reasoning_tag
+        if self.reasoning_tag is None and self.remove_reasoning is not None:
+            self.reasoning_tag = self.remove_reasoning
+
+    def configure_model_settings(self, model):
+        # Look for exact model match
+        exact_match = False
+        for ms in MODEL_SETTINGS:
+            # direct match, or match "provider/<model>"
+            if model == ms.name:
+                self._copy_fields(ms)
+                exact_match = True
+                break  # Continue to apply overrides
+
+        # Initialize accepts_settings if it's None
+        if self.accepts_settings is None:
+            self.accepts_settings = []
+
+        model = model.lower()
+
+        # If no exact match, try generic settings
+        if not exact_match:
+            self.apply_generic_model_settings(model)
+
+        # Apply override settings last if they exist
+        if (
+            self.extra_model_settings
+            and self.extra_model_settings.extra_params
+            and self.extra_model_settings.name == "aider/extra_params"
+        ):
+            # Initialize extra_params if it doesn't exist
+            if not self.extra_params:
+                self.extra_params = {}
+
+            # Deep merge the extra_params dicts
+            for key, value in self.extra_model_settings.extra_params.items():
+                if isinstance(value, dict) and isinstance(self.extra_params.get(key), dict):
+                    # For nested dicts, merge recursively
+                    self.extra_params[key] = {**self.extra_params[key], **value}
+                else:
+                    # For non-dict values, simply update
+                    self.extra_params[key] = value
+
+        # Ensure OpenRouter models accept thinking_tokens and reasoning_effort
+        if self.name.startswith("openrouter/"):
+            if self.accepts_settings is None:
+                self.accepts_settings = []
+            if "thinking_tokens" not in self.accepts_settings:
+                self.accepts_settings.append("thinking_tokens")
+            if "reasoning_effort" not in self.accepts_settings:
+                self.accepts_settings.append("reasoning_effort")
+
+    def apply_generic_model_settings(self, model):
+        if "/o3-mini" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.use_temperature = False
+            self.system_prompt_prefix = "Formatting re-enabled. "
+            self.system_prompt_prefix = "Formatting re-enabled. "
+            if "reasoning_effort" not in self.accepts_settings:
+                self.accepts_settings.append("reasoning_effort")
+            return  # <--
+
+        if "gpt-4.1-mini" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.reminder = "sys"
+            self.examples_as_sys_msg = False
+            return  # <--
+
+        if "gpt-4.1" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.reminder = "sys"
+            self.examples_as_sys_msg = False
+            return  # <--
+
+        last_segment = model.split("/")[-1]
+        if last_segment in ("gpt-5", "gpt-5-2025-08-07") or "gpt-5.1" in model:
+            self.use_temperature = False
+            self.edit_format = "diff"
+            if "reasoning_effort" not in self.accepts_settings:
+                self.accepts_settings.append("reasoning_effort")
+            return  # <--
+
+        if "/o1-mini" in model:
+            self.use_repo_map = True
+            self.use_temperature = False
+            self.use_system_prompt = False
+            return  # <--
+
+        if "/o1-preview" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.use_temperature = False
+            self.use_system_prompt = False
+            return  # <--
+
+        if "/o1" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.use_temperature = False
+            self.streaming = False
+            self.system_prompt_prefix = "Formatting re-enabled. "
+            if "reasoning_effort" not in self.accepts_settings:
+                self.accepts_settings.append("reasoning_effort")
+            return  # <--
+
+        if "deepseek" in model and "v3" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.reminder = "sys"
+            self.examples_as_sys_msg = True
+            return  # <--
+
+        if "deepseek" in model and ("r1" in model or "reasoning" in model):
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.examples_as_sys_msg = True
+            self.use_temperature = False
+            self.reasoning_tag = "think"
+            return  # <--
+
+        if ("llama3" in model or "llama-3" in model) and "70b" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.send_undo_reply = True
+            self.examples_as_sys_msg = True
+            return  # <--
+
+        if "gpt-4-turbo" in model or ("gpt-4-" in model and "-preview" in model):
+            self.edit_format = "udiff"
+            self.use_repo_map = True
+            self.send_undo_reply = True
+            return  # <--
+
+        if "gpt-4" in model or "claude-3-opus" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.send_undo_reply = True
+            return  # <--
+
+        if "gpt-3.5" in model or "gpt-4" in model:
+            self.reminder = "sys"
+            return  # <--
+
+        if "3-7-sonnet" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.examples_as_sys_msg = True
+            self.reminder = "user"
+            if "thinking_tokens" not in self.accepts_settings:
+                self.accepts_settings.append("thinking_tokens")
+            return  # <--
+
+        if "3.5-sonnet" in model or "3-5-sonnet" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            self.examples_as_sys_msg = True
+            self.reminder = "user"
+            return  # <--
+
+        if model.startswith("o1-") or "/o1-" in model:
+            self.use_system_prompt = False
+            self.use_temperature = False
+            return  # <--
+
+        if (
+            "qwen" in model
+            and "coder" in model
+            and ("2.5" in model or "2-5" in model)
+            and "32b" in model
+        ):
+            self.edit_format = "diff"
+            self.editor_edit_format = "editor-diff"
+            self.use_repo_map = True
+            return  # <--
+
+        if "qwq" in model and "32b" in model and "preview" not in model:
+            self.edit_format = "diff"
+            self.editor_edit_format = "editor-diff"
+            self.use_repo_map = True
+            self.reasoning_tag = "think"
+            self.examples_as_sys_msg = True
+            self.use_temperature = 0.6
+            self.extra_params = dict(top_p=0.95)
+            return  # <--
+
+        if "qwen3" in model:
+            self.edit_format = "diff"
+            self.use_repo_map = True
+            if "235b" in model:
+                self.system_prompt_prefix = "/no_think"
+                self.use_temperature = 0.7
+                self.extra_params = {"top_p": 0.8, "top_k": 20, "min_p": 0.0}
+            else:
+                self.examples_as_sys_msg = True
+                self.use_temperature = 0.6
+                self.reasoning_tag = "think"
+                self.extra_params = {"top_p": 0.95, "top_k": 20, "min_p": 0.0}
+            return  # <--
+
+        # use the defaults
+        if self.edit_format == "diff":
+            self.use_repo_map = True
+            return  # <--
+
+    def __str__(self):
+        return self.name
+
+    def get_weak_model(self, provided_weak_model_name):
+        # If weak_model_name is provided, override the model settings
+        if provided_weak_model_name:
+            self.weak_model_name = provided_weak_model_name
+
+        if not self.weak_model_name:
+            self.weak_model = self
+            return
+
+        if self.weak_model_name == self.name:
+            self.weak_model = self
+            return
+
+        self.weak_model = Model(
+            self.weak_model_name,
+            weak_model=False,
+        )
+        return self.weak_model
+
+    def commit_message_models(self):
+        return [self.weak_model, self]
+
+    def get_editor_model(self, provided_editor_model_name, editor_edit_format):
+        # If editor_model_name is provided, override the model settings
+        if provided_editor_model_name:
+            self.editor_model_name = provided_editor_model_name
+        if editor_edit_format:
+            self.editor_edit_format = editor_edit_format
+
+        if not self.editor_model_name or self.editor_model_name == self.name:
+            self.editor_model = self
+        else:
+            self.editor_model = Model(
+                self.editor_model_name,
+                editor_model=False,
+            )
+
+        if not self.editor_edit_format:
+            self.editor_edit_format = self.editor_model.edit_format
+            if self.editor_edit_format in ("diff", "whole", "diff-fenced"):
+                self.editor_edit_format = "editor-" + self.editor_edit_format
+
+        return self.editor_model
+
+    def tokenizer(self, text):
+        return litellm.encode(model=self.name, text=text)
+
+    def token_count(self, messages):
+        if isinstance(messages, dict):
+            messages = [messages]
+
+        if isinstance(messages, list):
+            try:
+                return litellm.token_counter(model=self.name, messages=messages)
+            except Exception:
+                pass  # fall back to raw tokenizer
+
+        if not self.tokenizer:
+            return 0
+
+        if isinstance(messages, str):
+            msgs = messages
+        else:
+            msgs = json.dumps(messages)
+
+        try:
+            return len(self.tokenizer(msgs))
+        except Exception as err:
+            print(f"Unable to count tokens with tokenizer: {err}")
+            return 0
+
+    def token_count_for_image(self, fname):
+        """
+        Calculate the token cost for an image assuming high detail.
+        The token cost is determined by the size of the image.
+        :param fname: The filename of the image.
+        :return: The token cost for the image.
+        """
+        width, height = self.get_image_size(fname)
+
+        # If the image is larger than 2048 in any dimension, scale it down to fit within 2048x2048
+        max_dimension = max(width, height)
+        if max_dimension > 2048:
+            scale_factor = 2048 / max_dimension
+            width = int(width * scale_factor)
+            height = int(height * scale_factor)
+
+        # Scale the image such that the shortest side is 768 pixels long
+        min_dimension = min(width, height)
+        scale_factor = 768 / min_dimension
+        width = int(width * scale_factor)
+        height = int(height * scale_factor)
+
+        # Calculate the number of 512x512 tiles needed to cover the image
+        tiles_width = math.ceil(width / 512)
+        tiles_height = math.ceil(height / 512)
+        num_tiles = tiles_width * tiles_height
+
+        # Each tile costs 170 tokens, and there's an additional fixed cost of 85 tokens
+        token_cost = num_tiles * 170 + 85
+        return token_cost
+
+    def get_image_size(self, fname):
+        """
+        Retrieve the size of an image.
+        :param fname: The filename of the image.
+        :return: A tuple (width, height) representing the image size in pixels.
+        """
+        with Image.open(fname) as img:
+            return img.size
+
+    def fast_validate_environment(self):
+        """Fast path for common models. Avoids forcing litellm import."""
+
+        model = self.name
+
+        pieces = model.split("/")
+        if len(pieces) > 1:
+            provider = pieces[0]
+        else:
+            provider = None
+
+        keymap = dict(
+            openrouter="OPENROUTER_API_KEY",
+            openai="OPENAI_API_KEY",
+            deepseek="DEEPSEEK_API_KEY",
+            gemini="GEMINI_API_KEY",
+            anthropic="ANTHROPIC_API_KEY",
+            groq="GROQ_API_KEY",
+            fireworks_ai="FIREWORKS_API_KEY",
+        )
+        var = None
+        if model in OPENAI_MODELS:
+            var = "OPENAI_API_KEY"
+        elif model in ANTHROPIC_MODELS:
+            var = "ANTHROPIC_API_KEY"
+        else:
+            var = keymap.get(provider)
+
+        if var and os.environ.get(var):
+            return dict(keys_in_environment=[var], missing_keys=[])
+
+    def validate_environment(self):
+        res = self.fast_validate_environment()
+        if res:
+            return res
+
+        # https://github.com/BerriAI/litellm/issues/3190
+
+        model = self.name
+        res = litellm.validate_environment(model)
+
+        # If missing AWS credential keys but AWS_PROFILE is set, consider AWS credentials valid
+        if res["missing_keys"] and any(
+            key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] for key in res["missing_keys"]
+        ):
+            if model.startswith("bedrock/") or model.startswith("us.anthropic."):
+                if os.environ.get("AWS_PROFILE"):
+                    res["missing_keys"] = [
+                        k
+                        for k in res["missing_keys"]
+                        if k not in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"]
+                    ]
+                    if not res["missing_keys"]:
+                        res["keys_in_environment"] = True
+
+        if res["keys_in_environment"]:
+            return res
+        if res["missing_keys"]:
+            return res
+
+        provider = self.info.get("litellm_provider", "").lower()
+        if provider == "cohere_chat":
+            return validate_variables(["COHERE_API_KEY"])
+        if provider == "gemini":
+            return validate_variables(["GEMINI_API_KEY"])
+        if provider == "groq":
+            return validate_variables(["GROQ_API_KEY"])
+
+        return res
+
+    def get_repo_map_tokens(self):
+        map_tokens = 1024
+        max_inp_tokens = self.info.get("max_input_tokens")
+        if max_inp_tokens:
+            map_tokens = max_inp_tokens / 8
+            map_tokens = min(map_tokens, 4096)
+            map_tokens = max(map_tokens, 1024)
+        return map_tokens
+
+    def set_reasoning_effort(self, effort):
+        """Set the reasoning effort parameter for models that support it"""
+        if effort is not None:
+            if self.name.startswith("openrouter/"):
+                if not self.extra_params:
+                    self.extra_params = {}
+                if "extra_body" not in self.extra_params:
+                    self.extra_params["extra_body"] = {}
+                self.extra_params["extra_body"]["reasoning"] = {"effort": effort}
+            else:
+                if not self.extra_params:
+                    self.extra_params = {}
+                if "extra_body" not in self.extra_params:
+                    self.extra_params["extra_body"] = {}
+                self.extra_params["extra_body"]["reasoning_effort"] = effort
+
+    def parse_token_value(self, value):
+        """
+        Parse a token value string into an integer.
+        Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc.
+
+        Args:
+            value: String or int token value
+
+        Returns:
+            Integer token value
+        """
+        if isinstance(value, int):
+            return value
+
+        if not isinstance(value, str):
+            return int(value)  # Try to convert to int
+
+        value = value.strip().upper()
+
+        if value.endswith("K"):
+            multiplier = 1024
+            value = value[:-1]
+        elif value.endswith("M"):
+            multiplier = 1024 * 1024
+            value = value[:-1]
+        else:
+            multiplier = 1
+
+        # Convert to float first to handle decimal values like "10.5k"
+        return int(float(value) * multiplier)
+
+    def set_thinking_tokens(self, value):
+        """
+        Set the thinking token budget for models that support it.
+        Accepts formats: 8096, "8k", "10.5k", "0.5M", "10K", etc.
+        Pass "0" to disable thinking tokens.
+        """
+        if value is not None:
+            num_tokens = self.parse_token_value(value)
+            self.use_temperature = False
+            if not self.extra_params:
+                self.extra_params = {}
+
+            # OpenRouter models use 'reasoning' instead of 'thinking'
+            if self.name.startswith("openrouter/"):
+                if "extra_body" not in self.extra_params:
+                    self.extra_params["extra_body"] = {}
+                if num_tokens > 0:
+                    self.extra_params["extra_body"]["reasoning"] = {"max_tokens": num_tokens}
+                else:
+                    if "reasoning" in self.extra_params["extra_body"]:
+                        del self.extra_params["extra_body"]["reasoning"]
+            else:
+                if num_tokens > 0:
+                    self.extra_params["thinking"] = {"type": "enabled", "budget_tokens": num_tokens}
+                else:
+                    if "thinking" in self.extra_params:
+                        del self.extra_params["thinking"]
+
+    def get_raw_thinking_tokens(self):
+        """Get formatted thinking token budget if available"""
+        budget = None
+
+        if self.extra_params:
+            # Check for OpenRouter reasoning format
+            if self.name.startswith("openrouter/"):
+                if (
+                    "extra_body" in self.extra_params
+                    and "reasoning" in self.extra_params["extra_body"]
+                    and "max_tokens" in self.extra_params["extra_body"]["reasoning"]
+                ):
+                    budget = self.extra_params["extra_body"]["reasoning"]["max_tokens"]
+            # Check for standard thinking format
+            elif (
+                "thinking" in self.extra_params and "budget_tokens" in self.extra_params["thinking"]
+            ):
+                budget = self.extra_params["thinking"]["budget_tokens"]
+
+        return budget
+
+    def get_thinking_tokens(self):
+        budget = self.get_raw_thinking_tokens()
+
+        if budget is not None:
+            # Format as xx.yK for thousands, xx.yM for millions
+            if budget >= 1024 * 1024:
+                value = budget / (1024 * 1024)
+                if value == int(value):
+                    return f"{int(value)}M"
+                else:
+                    return f"{value:.1f}M"
+            else:
+                value = budget / 1024
+                if value == int(value):
+                    return f"{int(value)}k"
+                else:
+                    return f"{value:.1f}k"
+        return None
+
+    def get_reasoning_effort(self):
+        """Get reasoning effort value if available"""
+        if self.extra_params:
+            # Check for OpenRouter reasoning format
+            if self.name.startswith("openrouter/"):
+                if (
+                    "extra_body" in self.extra_params
+                    and "reasoning" in self.extra_params["extra_body"]
+                    and "effort" in self.extra_params["extra_body"]["reasoning"]
+                ):
+                    return self.extra_params["extra_body"]["reasoning"]["effort"]
+            # Check for standard reasoning_effort format (e.g. in extra_body)
+            elif (
+                "extra_body" in self.extra_params
+                and "reasoning_effort" in self.extra_params["extra_body"]
+            ):
+                return self.extra_params["extra_body"]["reasoning_effort"]
+        return None
+
+    def is_deepseek(self):
+        name = self.name.lower()
+        if "deepseek" not in name:
+            return
+        return True
+
+    def is_ollama(self):
+        return self.name.startswith("ollama/") or self.name.startswith("ollama_chat/")
+
+    async def send_completion(
+        self, messages, functions, stream, temperature=None, tools=None, max_tokens=None
+    ):
+        if os.environ.get("AIDER_SANITY_CHECK_TURNS"):
+            sanity_check_messages(messages)
+
+        messages = model_request_parser(self, messages)
+
+        if self.verbose:
+            for message in messages:
+                msg_role = message.get("role")
+                msg_content = message.get("content") if message.get("content") else ""
+                msg_trunc = ""
+
+                if message.get("content"):
+                    msg_trunc = message.get("content")[:30]
+
+                print(f"{msg_role} ({len(msg_content)}): {msg_trunc}")
+
+        kwargs = dict(model=self.name, stream=stream)
+
+        if self.use_temperature is not False:
+            if temperature is None:
+                if isinstance(self.use_temperature, bool):
+                    temperature = 0
+                else:
+                    temperature = float(self.use_temperature)
+
+            kwargs["temperature"] = temperature
+
+        # `tools` is for modern tool usage. `functions` is for legacy/forced calls.
+        # This handles `base_coder` sending both with same content for `navigator_coder`.
+        effective_tools = []
+        if tools:
+            effective_tools.extend(tools)
+
+        if functions:
+            # Convert legacy `functions` to `tools` format and add them
+            effective_tools.extend([dict(type="function", function=f) for f in functions])
+
+        if effective_tools:
+            # Deduplicate tools based on function name
+            seen_tool_names = set()
+            deduped_tools = []
+            for tool in effective_tools:
+                tool_name = tool.get("function", {}).get("name")
+                if tool_name and tool_name not in seen_tool_names:
+                    deduped_tools.append(tool)
+                    seen_tool_names.add(tool_name)
+            effective_tools = deduped_tools
+            kwargs["tools"] = effective_tools
+
+        # Forcing a function call is for legacy style `functions` with a single function.
+        # This is used by ArchitectCoder and not intended for NavigatorCoder's tools.
+        if functions and len(functions) == 1:
+            function = functions[0]
+
+            if "name" in function:
+                tool_name = function.get("name")
+                if tool_name:
+                    kwargs["tool_choice"] = {"type": "function", "function": {"name": tool_name}}
+
+        if self.extra_params:
+            kwargs.update(self.extra_params)
+
+        if max_tokens:
+            kwargs["max_tokens"] = max_tokens
+
+        if "max_tokens" in kwargs and kwargs["max_tokens"]:
+            kwargs["max_completion_tokens"] = kwargs.pop("max_tokens")
+        if self.is_ollama() and "num_ctx" not in kwargs:
+            num_ctx = int(self.token_count(messages) * 1.25) + 8192
+            kwargs["num_ctx"] = num_ctx
+
+        key = json.dumps(kwargs, sort_keys=True).encode()
+        # dump(kwargs)
+
+        hash_object = hashlib.sha1(key)
+        if "timeout" not in kwargs:
+            kwargs["timeout"] = request_timeout
+        if self.verbose:
+            dump(kwargs)
+        kwargs["messages"] = messages
+
+        # Cache System Prompts When Possible
+        kwargs["cache_control_injection_points"] = [
+            {
+                "location": "message",
+                "role": "system",
+            },
+        ]
+
+        # Are we using github copilot?
+        if "GITHUB_COPILOT_TOKEN" in os.environ or self.name.startswith("github_copilot/"):
+            if "extra_headers" not in kwargs:
+                kwargs["extra_headers"] = {
+                    "Editor-Version": f"aider/{__version__}",
+                    "Copilot-Integration-Id": "vscode-chat",
+                }
+
+        try:
+            res = await litellm.acompletion(**kwargs)
+        except Exception as err:
+            print(f"LiteLLM API Error: {str(err)}")
+            res = self.model_error_response()
+
+            if self.verbose:
+                print(f"LiteLLM API Error: {str(err)}")
+                raise
+
+        return hash_object, res
+
+    async def simple_send_with_retries(self, messages, max_tokens=None):
+        from aider.exceptions import LiteLLMExceptions
+
+        litellm_ex = LiteLLMExceptions()
+        messages = model_request_parser(self, messages)
+        retry_delay = 0.125
+
+        if self.verbose:
+            dump(messages)
+
+        while True:
+            try:
+                _hash, response = await self.send_completion(
+                    messages=messages,
+                    functions=None,
+                    stream=False,
+                    max_tokens=max_tokens,
+                )
+                if not response or not hasattr(response, "choices") or not response.choices:
+                    return None
+                res = response.choices[0].message.content
+                from aider.reasoning_tags import remove_reasoning_content
+
+                return remove_reasoning_content(res, self.reasoning_tag)
+
+            except litellm_ex.exceptions_tuple() as err:
+                ex_info = litellm_ex.get_ex_info(err)
+                print(str(err))
+                if ex_info.description:
+                    print(ex_info.description)
+                should_retry = ex_info.retry
+                if should_retry:
+                    retry_delay *= 2
+                    if retry_delay > RETRY_TIMEOUT:
+                        should_retry = False
+                if not should_retry:
+                    return None
+                print(f"Retrying in {retry_delay:.1f} seconds...")
+                time.sleep(retry_delay)
+                continue
+            except AttributeError:
+                return None
+
+    async def model_error_response(self):
+        for i in range(1):
+            await asyncio.sleep(0.1)
+            yield litellm.ModelResponse(
+                choices=[
+                    litellm.Choices(
+                        finish_reason="stop",
+                        index=0,
+                        message=litellm.Message(
+                            content="Model API Response Error. Please retry the previous request"
+                        ),  # Provide an empty message object
+                    )
+                ],
+                model=self.name,
+            )
+
+
+def register_models(model_settings_fnames):
+    files_loaded = []
+    for model_settings_fname in model_settings_fnames:
+        if not os.path.exists(model_settings_fname):
+            continue
+
+        if not Path(model_settings_fname).read_text().strip():
+            continue
+
+        try:
+            with open(model_settings_fname, "r") as model_settings_file:
+                model_settings_list = yaml.safe_load(model_settings_file)
+
+            for model_settings_dict in model_settings_list:
+                model_settings = ModelSettings(**model_settings_dict)
+
+                # Remove all existing settings for this model name
+                MODEL_SETTINGS[:] = [ms for ms in MODEL_SETTINGS if ms.name != model_settings.name]
+                # Add the new settings
+                MODEL_SETTINGS.append(model_settings)
+        except Exception as e:
+            raise Exception(f"Error loading model settings from {model_settings_fname}: {e}")
+        files_loaded.append(model_settings_fname)
+
+    return files_loaded
+
+
+def register_litellm_models(model_fnames):
+    files_loaded = []
+    for model_fname in model_fnames:
+        if not os.path.exists(model_fname):
+            continue
+
+        try:
+            data = Path(model_fname).read_text()
+            if not data.strip():
+                continue
+            model_def = json5.loads(data)
+            if not model_def:
+                continue
+
+            # Defer registration with litellm to faster path.
+            model_info_manager.local_model_metadata.update(model_def)
+        except Exception as e:
+            raise Exception(f"Error loading model definition from {model_fname}: {e}")
+
+        files_loaded.append(model_fname)
+
+    return files_loaded
+
+
+def validate_variables(vars):
+    missing = []
+    for var in vars:
+        if var not in os.environ:
+            missing.append(var)
+    if missing:
+        return dict(keys_in_environment=False, missing_keys=missing)
+    return dict(keys_in_environment=True, missing_keys=missing)
+
+
+async def sanity_check_models(io, main_model):
+    problem_main = await sanity_check_model(io, main_model)
+
+    problem_weak = None
+    if main_model.weak_model and main_model.weak_model is not main_model:
+        problem_weak = await sanity_check_model(io, main_model.weak_model)
+
+    problem_editor = None
+    if (
+        main_model.editor_model
+        and main_model.editor_model is not main_model
+        and main_model.editor_model is not main_model.weak_model
+    ):
+        problem_editor = await sanity_check_model(io, main_model.editor_model)
+
+    return problem_main or problem_weak or problem_editor
+
+
+async def sanity_check_model(io, model):
+    show = False
+
+    if model.missing_keys:
+        show = True
+        io.tool_warning(f"Warning: {model} expects these environment variables")
+        for key in model.missing_keys:
+            value = os.environ.get(key, "")
+            status = "Set" if value else "Not set"
+            io.tool_output(f"- {key}: {status}")
+
+        if platform.system() == "Windows":
+            io.tool_output(
+                "Note: You may need to restart your terminal or command prompt for `setx` to take"
+                " effect."
+            )
+
+    elif not model.keys_in_environment:
+        show = True
+        io.tool_warning(f"Warning for {model}: Unknown which environment variables are required.")
+
+    # Check for model-specific dependencies
+    await check_for_dependencies(io, model.name)
+
+    if not model.info:
+        show = True
+        io.tool_warning(
+            f"Warning for {model}: Unknown context window size and costs, using sane defaults."
+        )
+
+        possible_matches = fuzzy_match_models(model.name)
+        if possible_matches:
+            io.tool_output("Did you mean one of these?")
+            for match in possible_matches:
+                io.tool_output(f"- {match}")
+
+    return show
+
+
+async def check_for_dependencies(io, model_name):
+    """
+    Check for model-specific dependencies and install them if needed.
+
+    Args:
+        io: The IO object for user interaction
+        model_name: The name of the model to check dependencies for
+    """
+    # Check if this is a Bedrock model and ensure boto3 is installed
+    if model_name.startswith("bedrock/"):
+        await check_pip_install_extra(
+            io, "boto3", "AWS Bedrock models require the boto3 package.", ["boto3"]
+        )
+
+    # Check if this is a Vertex AI model and ensure google-cloud-aiplatform is installed
+    elif model_name.startswith("vertex_ai/"):
+        await check_pip_install_extra(
+            io,
+            "google.cloud.aiplatform",
+            "Google Vertex AI models require the google-cloud-aiplatform package.",
+            ["google-cloud-aiplatform"],
+        )
+
+
+def fuzzy_match_models(name):
+    name = name.lower()
+
+    chat_models = set()
+    model_metadata = list(litellm.model_cost.items())
+    model_metadata += list(model_info_manager.local_model_metadata.items())
+
+    for orig_model, attrs in model_metadata:
+        model = orig_model.lower()
+        if attrs.get("mode") != "chat":
+            continue
+        provider = attrs.get("litellm_provider", "").lower()
+        if not provider:
+            continue
+        provider += "/"
+
+        if model.startswith(provider):
+            fq_model = orig_model
+        else:
+            fq_model = provider + orig_model
+
+        chat_models.add(fq_model)
+        chat_models.add(orig_model)
+
+    chat_models = sorted(chat_models)
+    # exactly matching model
+    # matching_models = [
+    #    (fq,m) for fq,m in chat_models
+    #    if name == fq or name == m
+    # ]
+    # if matching_models:
+    #    return matching_models
+
+    # Check for model names containing the name
+    matching_models = [m for m in chat_models if name in m]
+    if matching_models:
+        return sorted(set(matching_models))
+
+    # Check for slight misspellings
+    models = set(chat_models)
+    matching_models = difflib.get_close_matches(name, models, n=3, cutoff=0.8)
+
+    return sorted(set(matching_models))
+
+
+def print_matching_models(io, search):
+    matches = fuzzy_match_models(search)
+    if matches:
+        io.tool_output(f'Models which match "{search}":')
+        for model in matches:
+            io.tool_output(f"- {model}")
+    else:
+        io.tool_output(f'No models match "{search}".')
+
+
+def get_model_settings_as_yaml():
+    from dataclasses import fields
+
+    import yaml
+
+    model_settings_list = []
+    # Add default settings first with all field values
+    defaults = {}
+    for field in fields(ModelSettings):
+        defaults[field.name] = field.default
+    defaults["name"] = "(default values)"
+    model_settings_list.append(defaults)
+
+    # Sort model settings by name
+    for ms in sorted(MODEL_SETTINGS, key=lambda x: x.name):
+        # Create dict with explicit field order
+        model_settings_dict = {}
+        for field in fields(ModelSettings):
+            value = getattr(ms, field.name)
+            if value != field.default:
+                model_settings_dict[field.name] = value
+        model_settings_list.append(model_settings_dict)
+        # Add blank line between entries
+        model_settings_list.append(None)
+
+    # Filter out None values before dumping
+    yaml_str = yaml.dump(
+        [ms for ms in model_settings_list if ms is not None],
+        default_flow_style=False,
+        sort_keys=False,  # Preserve field order from dataclass
+    )
+    # Add actual blank lines between entries
+    return yaml_str.replace("\n- ", "\n\n- ")
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python models.py <model_name> or python models.py --yaml")
+        sys.exit(1)
+
+    if sys.argv[1] == "--yaml":
+        yaml_string = get_model_settings_as_yaml()
+        print(yaml_string)
+    else:
+        model_name = sys.argv[1]
+        matching_models = fuzzy_match_models(model_name)
+
+        if matching_models:
+            print(f"Matching models for '{model_name}':")
+            for model in matching_models:
+                print(model)
+        else:
+            print(f"No matching models found for '{model_name}'.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/basic/test_exceptions.py b/tests/basic/test_exceptions.py
index 821bf249956..f19d758c723 100644
--- a/tests/basic/test_exceptions.py
+++ b/tests/basic/test_exceptions.py
@@ -1,97 +1,97 @@
-from aider.exceptions import ExInfo, LiteLLMExceptions
-
-
-def test_litellm_exceptions_load():
-    """Test that LiteLLMExceptions loads without errors"""
-    ex = LiteLLMExceptions()
-    assert len(ex.exceptions) > 0
-
-
-def test_exceptions_tuple():
-    """Test that exceptions_tuple returns a non-empty tuple"""
-    ex = LiteLLMExceptions()
-    assert isinstance(ex.exceptions_tuple(), tuple)
-    assert len(ex.exceptions_tuple()) > 0
-
-
-def test_get_ex_info():
-    """Test get_ex_info returns correct ExInfo"""
-    ex = LiteLLMExceptions()
-
-    # Test with a known exception type
-    from litellm import AuthenticationError
-
-    auth_error = AuthenticationError(
-        message="Invalid API key", llm_provider="openai", model="gpt-4"
-    )
-    ex_info = ex.get_ex_info(auth_error)
-    assert isinstance(ex_info, ExInfo)
-    assert ex_info.name == "AuthenticationError"
-    assert ex_info.retry is False
-    assert "API key" in ex_info.description
-
-    # Test with unknown exception type
-    class UnknownError(Exception):
-        pass
-
-    unknown = UnknownError()
-    ex_info = ex.get_ex_info(unknown)
-    assert isinstance(ex_info, ExInfo)
-    assert ex_info.name is None
-    assert ex_info.retry is None
-    assert ex_info.description is None
-
-
-def test_rate_limit_error():
-    """Test specific handling of RateLimitError"""
-    ex = LiteLLMExceptions()
-    from litellm import RateLimitError
-
-    rate_error = RateLimitError(message="Rate limit exceeded", llm_provider="openai", model="gpt-4")
-    ex_info = ex.get_ex_info(rate_error)
-    assert ex_info.retry is True
-    assert "rate limited" in ex_info.description.lower()
-
-
-def test_bad_gateway_error():
-    """Test specific handling of BadGatewayError"""
-    ex = LiteLLMExceptions()
-    from litellm import BadGatewayError
-
-    bad_gateway_error = BadGatewayError(
-        message="Bad Gateway", llm_provider="openai", model="gpt-4"
-    )
-    ex_info = ex.get_ex_info(bad_gateway_error)
-    assert ex_info.retry is True
-    assert ex_info.name == "BadGatewayError"
-
-
-def test_context_window_error():
-    """Test specific handling of ContextWindowExceededError"""
-    ex = LiteLLMExceptions()
-    from litellm import ContextWindowExceededError
-
-    ctx_error = ContextWindowExceededError(
-        message="Context length exceeded", model="gpt-4", llm_provider="openai"
-    )
-    ex_info = ex.get_ex_info(ctx_error)
-    assert ex_info.retry is False
-
-
-def test_openrouter_error():
-    """Test specific handling of OpenRouter API errors"""
-    ex = LiteLLMExceptions()
-    from litellm import APIConnectionError
-
-    # Create an APIConnectionError with OpenrouterException message
-    openrouter_error = APIConnectionError(
-        message="APIConnectionError: OpenrouterException - 'choices'",
-        model="openrouter/model",
-        llm_provider="openrouter",
-    )
-
-    ex_info = ex.get_ex_info(openrouter_error)
-    assert ex_info.retry is True
-    assert "OpenRouter" in ex_info.description
-    assert "overloaded" in ex_info.description
-    assert "rate" in ex_info.description
+from aider.exceptions import ExInfo, LiteLLMExceptions
+
+
+def test_litellm_exceptions_load():
+    """Test that LiteLLMExceptions loads without errors"""
+    ex = LiteLLMExceptions()
+    assert len(ex.exceptions) > 0
+
+
+def test_exceptions_tuple():
+    """Test that exceptions_tuple returns a non-empty tuple"""
+    ex = LiteLLMExceptions()
+    assert isinstance(ex.exceptions_tuple(), tuple)
+    assert len(ex.exceptions_tuple()) > 0
+
+
+def test_get_ex_info():
+    """Test get_ex_info returns correct ExInfo"""
+    ex = LiteLLMExceptions()
+
+    # Test with a known exception type
+    from litellm import AuthenticationError
+
+    auth_error = AuthenticationError(
+        message="Invalid API key", llm_provider="openai", model="gpt-4"
+    )
+    ex_info = ex.get_ex_info(auth_error)
+    assert isinstance(ex_info, ExInfo)
+    assert ex_info.name == "AuthenticationError"
+    assert ex_info.retry is False
+    assert "API key" in ex_info.description
+
+    # Test with unknown exception type
+    class UnknownError(Exception):
+        pass
+
+    unknown = UnknownError()
+    ex_info = ex.get_ex_info(unknown)
+    assert isinstance(ex_info, ExInfo)
+    assert ex_info.name is None
+    assert ex_info.retry is None
+    assert ex_info.description is None
+
+
+def test_rate_limit_error():
+    """Test specific handling of RateLimitError"""
+    ex = LiteLLMExceptions()
+    from litellm import RateLimitError
+
+    rate_error = RateLimitError(message="Rate limit exceeded", llm_provider="openai", model="gpt-4")
+    ex_info = ex.get_ex_info(rate_error)
+    assert ex_info.retry is True
+    assert "rate limited" in ex_info.description.lower()
+
+
+def test_bad_gateway_error():
+    """Test specific handling of BadGatewayError"""
+    ex = LiteLLMExceptions()
+    from litellm import BadGatewayError
+
+    bad_gateway_error = BadGatewayError(
+        message="Bad Gateway", llm_provider="openai", model="gpt-4"
+    )
+    ex_info = ex.get_ex_info(bad_gateway_error)
+    assert ex_info.retry is True
+    assert ex_info.name == "BadGatewayError"
+
+
+def test_context_window_error():
+    """Test specific handling of ContextWindowExceededError"""
+    ex = LiteLLMExceptions()
+    from litellm import ContextWindowExceededError
+
+    ctx_error = ContextWindowExceededError(
+        message="Context length exceeded", model="gpt-4", llm_provider="openai"
+    )
+    ex_info = ex.get_ex_info(ctx_error)
+    assert ex_info.retry is False
+
+
+def test_openrouter_error():
+    """Test specific handling of OpenRouter API errors"""
+    ex = LiteLLMExceptions()
+    from litellm import APIConnectionError
+
+    # Create an APIConnectionError with OpenrouterException message
+    openrouter_error = APIConnectionError(
+        message="APIConnectionError: OpenrouterException - 'choices'",
+        model="openrouter/model",
+        llm_provider="openrouter",
+    )
+
+    ex_info = ex.get_ex_info(openrouter_error)
+    assert ex_info.retry is True
+    assert "OpenRouter" in ex_info.description
+    assert "overloaded" in ex_info.description
+    assert "rate" in ex_info.description

From 6ed6be0bff79f645c335f35f02898470f7844791 Mon Sep 17 00:00:00 2001
From: Your Name <szmania@yahoo.com>
Date: Wed, 26 Nov 2025 16:55:41 -0800
Subject: [PATCH 09/17] removed fix line ending script

---
 fix_line_endings.py | 26 --------------------------
 1 file changed, 26 deletions(-)
 delete mode 100644 fix_line_endings.py

diff --git a/fix_line_endings.py b/fix_line_endings.py
deleted file mode 100644
index 6410824268a..00000000000
--- a/fix_line_endings.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import sys
-
-def fix_line_endings(file_path):
-    """
-    Converts the line endings of a file from CRLF to LF.
-    """
-    try:
-        with open(file_path, 'r', newline='', encoding='utf-8') as f:
-            content = f.read()
-
-        with open(file_path, 'w', newline='\n', encoding='utf-8') as f:
-            f.write(content)
-
-        print(f"Successfully converted line endings for: {file_path}")
-
-    except Exception as e:
-        print(f"Error processing file {file_path}: {e}", file=sys.stderr)
-        sys.exit(1)
-
-if __name__ == "__main__":
-    if len(sys.argv) != 2:
-        print("Usage: python fix_line_endings.py <file_path>", file=sys.stderr)
-        sys.exit(1)
-    
-    file_to_fix = sys.argv[1]
-    fix_line_endings(file_to_fix)

From 76778c6cf944947f62a3f115e10ca8995bfae1dd Mon Sep 17 00:00:00 2001
From: Dustin Washington <dwash96@gmail.com>
Date: Wed, 26 Nov 2025 23:24:37 -0500
Subject: [PATCH 10/17] Bump Version

---
 aider/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aider/__init__.py b/aider/__init__.py
index 8ae2fd4d7c3..4c3f26c50bc 100644
--- a/aider/__init__.py
+++ b/aider/__init__.py
@@ -1,6 +1,6 @@
 from packaging import version
 
-__version__ = "0.88.30.dev"
+__version__ = "0.88.31.dev"
 safe_version = __version__
 
 try:

From cd92a2c3a5491419814b45bc281918117adc4f07 Mon Sep 17 00:00:00 2001
From: Dustin Washington <dwash96@gmail.com>
Date: Wed, 26 Nov 2025 23:34:57 -0500
Subject: [PATCH 11/17] Add gitattributes

---
 .gitattributes | 1 +
 .gitignore     | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 .gitattributes

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000000..94f480de94e
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+* text=auto eol=lf
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index d2cffc639ff..6a8fe65642b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@
 # Specific Files
 !/.dockerignore
 !/.flake8
+!/.gitattributes
 !/.gitignore
 !/.pre-commit-config.yaml
 !/CHANGELOG.md

From 951257ea531a9f7b7efa093cde55193ba735cae2 Mon Sep 17 00:00:00 2001
From: Dustin Washington <dwash96@gmail.com>
Date: Wed, 26 Nov 2025 23:43:03 -0500
Subject: [PATCH 12/17] Allow retries on BadGatewayErrors

---
 aider/exceptions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aider/exceptions.py b/aider/exceptions.py
index 5fb84d992c6..a151f504150 100644
--- a/aider/exceptions.py
+++ b/aider/exceptions.py
@@ -20,7 +20,7 @@ class ExInfo:
         "The API provider is not able to authenticate you. Check your API key.",
     ),
     ExInfo("AzureOpenAIError", True, None),
-    ExInfo("BadGatewayError", False, None),
+    ExInfo("BadGatewayError", True, None),
     ExInfo("BadRequestError", False, None),
     ExInfo("BudgetExceededError", True, None),
     ExInfo(

From 0cb6631793dac8486ab1588f35dc4018d7dc51f9 Mon Sep 17 00:00:00 2001
From: Dustin Washington <dwash96@gmail.com>
Date: Thu, 27 Nov 2025 00:55:20 -0500
Subject: [PATCH 13/17] Add context block configuration for more control over
 message sizes

---
 aider/coders/agent_coder.py             | 81 ++++++++++++++++---------
 aider/website/docs/config/agent-mode.md | 25 +++++++-
 2 files changed, 74 insertions(+), 32 deletions(-)

diff --git a/aider/coders/agent_coder.py b/aider/coders/agent_coder.py
index cf61d2445c2..f0ae593ae44 100644
--- a/aider/coders/agent_coder.py
+++ b/aider/coders/agent_coder.py
@@ -135,6 +135,7 @@ def __init__(self, *args, **kwargs):
 
         # Initialize empty token tracking dictionary and cache structures
         # but don't populate yet to avoid startup delay
+        self.allowed_context_blocks = set()
         self.context_block_tokens = {}
         self.context_blocks_cache = {}
         self.tokens_calculated = False
@@ -257,6 +258,25 @@ def _get_agent_config(self):
         if "tools_excludelist" not in config:
             config["tools_excludelist"] = []
 
+        if "include_context_blocks" in config:
+            self.allowed_context_blocks = set(config["context_blocks"])
+        else:
+            self.allowed_context_blocks = {
+                "context_summary",
+                "directory_structure",
+                "environment_info",
+                "git_status",
+                "symbol_outline",
+                "todo_list",
+            }
+
+        if "exclude_context_blocks" in config:
+            for context_block in config["exclude_context_blocks"]:
+                try:
+                    self.allowed_context_blocks.remove(context_block)
+                except KeyError:
+                    pass
+
         # Apply configuration to instance
         self.large_file_token_threshold = config["large_file_token_threshold"]
         self.skip_cli_confirmations = config.get(
@@ -468,11 +488,12 @@ def _calculate_context_block_tokens(self, force=False):
             ]
 
             for block_type in block_types:
-                block_content = self._generate_context_block(block_type)
-                if block_content:
-                    self.context_block_tokens[block_type] = self.main_model.token_count(
-                        block_content
-                    )
+                if block_type in self.allowed_context_blocks:
+                    block_content = self._generate_context_block(block_type)
+                    if block_content:
+                        self.context_block_tokens[block_type] = self.main_model.token_count(
+                            block_content
+                        )
 
             # Mark as calculated
             self.tokens_calculated = True
@@ -670,12 +691,25 @@ def format_chat_chunks(self):
         chunks.examples = example_messages
 
         self.summarize_end()
-        chunks.done = list(self.done_messages)
 
-        chunks.repo = self.get_repo_messages()
         chunks.readonly_files = self.get_readonly_files_messages()
+        chunks.repo = self.get_repo_messages()
+        chunks.done = list(self.done_messages)
         chunks.chat_files = self.get_chat_files_messages()
 
+        # Add reminder if needed
+        if self.gpt_prompts.system_reminder:
+            reminder_message = [
+                dict(
+                    role="system", content=self.fmt_system_prompt(self.gpt_prompts.system_reminder)
+                ),
+            ]
+        else:
+            reminder_message = []
+
+        chunks.cur = list(self.cur_messages)
+        chunks.reminder = []
+
         # Make sure token counts are updated - using centralized method
         # This also populates the context block cache
         self._calculate_context_block_tokens()
@@ -693,9 +727,9 @@ def format_chat_chunks(self):
         # 1. Add relatively static blocks BEFORE done_messages
         # These blocks change less frequently and can be part of the cacheable prefix
         static_blocks = []
-        if dir_structure:
+        if dir_structure and "directory_structure" in self.allowed_context_blocks:
             static_blocks.append(dir_structure)
-        if env_context:
+        if env_context and "environment_info" in self.allowed_context_blocks:
             static_blocks.append(env_context)
 
         if static_blocks:
@@ -706,13 +740,13 @@ def format_chat_chunks(self):
         # 2. Add dynamic blocks AFTER chat_files
         # These blocks change with the current files in context
         dynamic_blocks = []
-        if todo_list:
+        if todo_list and "todo_list" in self.allowed_context_blocks:
             dynamic_blocks.append(todo_list)
-        if context_summary:
+        if context_summary and "context_summary" in self.allowed_context_blocks:
             dynamic_blocks.append(context_summary)
-        if symbol_outline:
+        if symbol_outline and "symbol_outline" in self.allowed_context_blocks:
             dynamic_blocks.append(symbol_outline)
-        if git_status:
+        if git_status and "git_status" in self.allowed_context_blocks:
             dynamic_blocks.append(git_status)
 
         # Add tool usage context if there are repetitive tools
@@ -725,21 +759,8 @@ def format_chat_chunks(self):
 
         if dynamic_blocks:
             dynamic_message = "\n\n".join(dynamic_blocks)
-            # Append as a system message after chat_files
-            chunks.chat_files.append(dict(role="system", content=dynamic_message))
-
-        # Add reminder if needed
-        if self.gpt_prompts.system_reminder:
-            reminder_message = [
-                dict(
-                    role="system", content=self.fmt_system_prompt(self.gpt_prompts.system_reminder)
-                ),
-            ]
-        else:
-            reminder_message = []
-
-        chunks.cur = list(self.cur_messages)
-        chunks.reminder = []
+            # Append as a system message on reminders
+            reminder_message.insert(0, dict(role="system", content=dynamic_message))
 
         # Use accurate token counting method that considers enhanced context blocks
         base_messages = chunks.all_messages()
@@ -1654,7 +1675,9 @@ def _generate_tool_context(self, repetitive_tools):
             for tool in repetitive_tools:
                 context_parts.append(f"- `{tool}`")
             context_parts.append(
-                "Your exploration appears to be stuck in a loop. Please try a different approach:"
+                "Your exploration appears to be stuck in a loop. Please try a different approach."
+                " Use the `Thinking` tool to clarify your intentions and new approach to"
+                " what you are currently attempting to accomplish."
             )
             context_parts.append("\n")
             context_parts.append("**Suggestions for alternative approaches:**")
diff --git a/aider/website/docs/config/agent-mode.md b/aider/website/docs/config/agent-mode.md
index 56991db409d..ca9f0d8d039 100644
--- a/aider/website/docs/config/agent-mode.md
+++ b/aider/website/docs/config/agent-mode.md
@@ -154,6 +154,8 @@ Agent Mode can be configured using the `--agent-config` command line argument, w
 - **`skip_cli_confirmations`**: YOLO mode, be brave and let the LLM cook, can also use the option `yolo` (default: False)
 - **`tools_includelist`**: Array of tool names to allow (only these tools will be available)
 - **`tools_excludelist`**: Array of tool names to exclude (these tools will be disabled)
+- **`include_context_blocks`**: Array of context block names to include (overrides default set)
+- **`exclude_context_blocks`**: Array of context block names to exclude from default set
 
 #### Essential Tools
 
@@ -164,6 +166,18 @@ Certain tools are always available regardless of includelist/excludelist setting
 - `view` - View files
 - `finished` - Complete the task
 
+#### Context Blocks
+
+The following context blocks are available by default and can be customized using `include_context_blocks` and `exclude_context_blocks`:
+
+- **`context_summary`**: Shows current context usage and token limits
+- **`directory_structure`**: Displays the project's file structure
+- **`git_status`**: Shows current git branch, status, and recent commits
+- **`symbol_outline`**: Lists classes, functions, and methods in current context
+- **`todo_list`**: Shows the current todo list managed via `UpdateTodoList` tool
+
+When `include_context_blocks` is specified, only the listed blocks will be included. When `exclude_context_blocks` is specified, the listed blocks will be removed from the default set.
+
 #### Other Aider-CE CLI/Config Options for Agent Mode
 
 - `preserve-todo-list` - Preserve todo list across sessions
@@ -187,8 +201,14 @@ aider-ce --agent --agent-config '{"tools_excludelist": ["command", "commandinter
 # Custom large file threshold
 aider-ce --agent --agent-config '{"large_file_token_threshold": 10000}'
 
+# Custom context blocks configuration
+aider-ce --agent --agent-config '{"include_context_blocks": ["directory_structure", "git_status"]}'
+
+# Exclude specific context blocks
+aider-ce --agent --agent-config '{"exclude_context_blocks": ["symbol_outline", "todo_list"]}'
+
 # Combined configuration
-aider-ce --agent --agent-config '{"large_file_token_threshold": 10000, "tools_includelist": ["view", "makeeditable", "replacetext", "finished", "gitdiff"]}'
+aider-ce --agent --agent-config '{"large_file_token_threshold": 10000, "tools_includelist": ["view", "makeeditable", "replacetext", "finished", "gitdiff"], "include_context_blocks": ["directory_structure", "git_status"]}'
 
 # Command Line Options
 aider-ce --agent --agent-config '{"large_file_token_threshold": 10000, "tools_includelist": ["view", "makeeditable", "replacetext", "finished", "gitdiff"]}' --preserve-todo-list --use-enhanced-map
@@ -204,5 +224,4 @@ This configuration system allows for fine-grained control over which tools are a
 - **Scalable exploration**: Can handle large codebases through strategic context management
 - **Recovery mechanisms**: Built-in undo and safety features
 
-Agent Mode represents a significant evolution in aider's capabilities, enabling more sophisticated and autonomous codebase manipulation while maintaining safety and control through the tool-based architecture.
-
+Agent Mode represents a significant evolution in aider's capabilities, enabling more sophisticated and autonomous codebase manipulation while maintaining safety and control through the tool-based architecture.
\ No newline at end of file

From a3dfc86763cb6769c0c84cc650013a41ed2e4106 Mon Sep 17 00:00:00 2001
From: Dustin Washington <dwash96@gmail.com>
Date: Thu, 27 Nov 2025 00:55:57 -0500
Subject: [PATCH 14/17] Fix formatting

---
 tests/basic/test_exceptions.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/basic/test_exceptions.py b/tests/basic/test_exceptions.py
index f19d758c723..6025a9cec53 100644
--- a/tests/basic/test_exceptions.py
+++ b/tests/basic/test_exceptions.py
@@ -58,9 +58,7 @@ def test_bad_gateway_error():
     ex = LiteLLMExceptions()
     from litellm import BadGatewayError
 
-    bad_gateway_error = BadGatewayError(
-        message="Bad Gateway", llm_provider="openai", model="gpt-4"
-    )
+    bad_gateway_error = BadGatewayError(message="Bad Gateway", llm_provider="openai", model="gpt-4")
     ex_info = ex.get_ex_info(bad_gateway_error)
     assert ex_info.retry is True
     assert ex_info.name == "BadGatewayError"

From d74ee434cbee97cdd657eb599a5bb2ec29021dab Mon Sep 17 00:00:00 2001
From: Dustin Washington <dwash96@gmail.com>
Date: Thu, 27 Nov 2025 02:28:32 -0500
Subject: [PATCH 15/17] Add similarity lookups a hedge against repetitious tool
 calls

---
 aider/coders/agent_coder.py | 145 ++++++++++++++++++++++++++++--------
 1 file changed, 113 insertions(+), 32 deletions(-)

diff --git a/aider/coders/agent_coder.py b/aider/coders/agent_coder.py
index f0ae593ae44..73bec1fa156 100644
--- a/aider/coders/agent_coder.py
+++ b/aider/coders/agent_coder.py
@@ -20,6 +20,13 @@
 
 # Import the change tracker
 from aider.change_tracker import ChangeTracker
+
+# Import similarity functions for tool usage analysis
+from aider.helpers.similarity import (
+    cosine_similarity,
+    create_bigram_vector,
+    normalize_vector,
+)
 from aider.mcp.server import LocalServer
 from aider.repo import ANY_GIT_ERROR
 
@@ -79,8 +86,15 @@ def __init__(self, *args, **kwargs):
         self.recently_removed = {}
 
         # Tool usage history
-        self.tool_usage_history = []
+        self.tool_usage_history = []  # Stores lists of tools used in each round
         self.tool_usage_retries = 10
+        self.last_round_tools = []  # Tools used in the current round
+
+        # Similarity tracking for tool usage
+        self.tool_call_vectors = []  # Store vectors for individual tool calls
+        self.tool_similarity_threshold = 0.99  # High threshold for exact matches
+        self.max_tool_vector_history = 10  # Keep history of 10 rounds
+
         self.read_tools = {
             "viewfilesatglob",
             "viewfilesmatching",
@@ -102,7 +116,7 @@ def __init__(self, *args, **kwargs):
         }
 
         # Configuration parameters
-        self.max_tool_calls = 100  # Maximum number of tool calls per response
+        self.max_tool_calls = 10000  # Maximum number of tool calls per response
 
         # Context management parameters
         # Will be overridden by agent_config if provided
@@ -693,9 +707,9 @@ def format_chat_chunks(self):
         self.summarize_end()
 
         chunks.readonly_files = self.get_readonly_files_messages()
+        chunks.chat_files = self.get_chat_files_messages()
         chunks.repo = self.get_repo_messages()
         chunks.done = list(self.done_messages)
-        chunks.chat_files = self.get_chat_files_messages()
 
         # Add reminder if needed
         if self.gpt_prompts.system_reminder:
@@ -727,38 +741,44 @@ def format_chat_chunks(self):
         # 1. Add relatively static blocks BEFORE done_messages
         # These blocks change less frequently and can be part of the cacheable prefix
         static_blocks = []
-        if dir_structure and "directory_structure" in self.allowed_context_blocks:
-            static_blocks.append(dir_structure)
         if env_context and "environment_info" in self.allowed_context_blocks:
             static_blocks.append(env_context)
+        if dir_structure and "directory_structure" in self.allowed_context_blocks:
+            static_blocks.append(dir_structure)
 
         if static_blocks:
             static_message = "\n\n".join(static_blocks)
             # Insert as a system message right before done_messages
-            chunks.done.insert(0, dict(role="system", content=static_message))
+            chunks.system.append(dict(role="system", content=static_message))
 
         # 2. Add dynamic blocks AFTER chat_files
         # These blocks change with the current files in context
-        dynamic_blocks = []
-        if todo_list and "todo_list" in self.allowed_context_blocks:
-            dynamic_blocks.append(todo_list)
+        pre_dynamic_blocks = []
+        post_dynamic_blocks = []
         if context_summary and "context_summary" in self.allowed_context_blocks:
-            dynamic_blocks.append(context_summary)
+            pre_dynamic_blocks.append(context_summary)
         if symbol_outline and "symbol_outline" in self.allowed_context_blocks:
-            dynamic_blocks.append(symbol_outline)
+            pre_dynamic_blocks.append(symbol_outline)
         if git_status and "git_status" in self.allowed_context_blocks:
-            dynamic_blocks.append(git_status)
+            pre_dynamic_blocks.append(git_status)
 
+        if todo_list and "todo_list" in self.allowed_context_blocks:
+            post_dynamic_blocks.append(todo_list)
         # Add tool usage context if there are repetitive tools
         if hasattr(self, "tool_usage_history") and self.tool_usage_history:
             repetitive_tools = self._get_repetitive_tools()
             if repetitive_tools:
                 tool_context = self._generate_tool_context(repetitive_tools)
                 if tool_context:
-                    dynamic_blocks.append(tool_context)
+                    post_dynamic_blocks.append(tool_context)
 
-        if dynamic_blocks:
-            dynamic_message = "\n\n".join(dynamic_blocks)
+        if pre_dynamic_blocks:
+            dynamic_message = "\n\n".join(pre_dynamic_blocks)
+            # Append as a system message on reminders
+            chunks.done.insert(0, dict(role="system", content=dynamic_message))
+
+        if post_dynamic_blocks:
+            dynamic_message = "\n\n".join(post_dynamic_blocks)
             # Append as a system message on reminders
             reminder_message.insert(0, dict(role="system", content=dynamic_message))
 
@@ -972,13 +992,34 @@ async def process_tool_calls(self, tool_call_response):
         self.agent_finished = False
         await self.auto_save_session()
 
+        # Clear last round tools and start tracking new round
+        self.last_round_tools = []
+
         if self.partial_response_tool_calls:
             for tool_call in self.partial_response_tool_calls:
-                self.tool_usage_history.append(tool_call.get("function", {}).get("name"))
+                tool_name = tool_call.get("function", {}).get("name")
+                self.last_round_tools.append(tool_name)
+
+                # Create and store vector for this tool call
+                # Remove id property if present before stringifying
+                tool_call_copy = tool_call.copy()
+                if "id" in tool_call_copy:
+                    del tool_call_copy["id"]
+                tool_call_str = str(tool_call_copy)  # Convert entire tool call to string
+                tool_vector = create_bigram_vector((tool_call_str,))
+                tool_vector_norm = normalize_vector(tool_vector)
+                self.tool_call_vectors.append(tool_vector_norm)
+
+        # Add the completed round to history
+        if self.last_round_tools:
+            self.tool_usage_history += self.last_round_tools
 
         if len(self.tool_usage_history) > self.tool_usage_retries:
             self.tool_usage_history.pop(0)
 
+        if len(self.tool_call_vectors) > self.max_tool_vector_history:
+            self.tool_call_vectors.pop(0)
+
         return await super().process_tool_calls(tool_call_response)
 
     async def reply_completed(self):
@@ -1595,13 +1636,14 @@ async def _process_tool_commands(self, content):
 
     def _get_repetitive_tools(self):
         """
-        Identifies repetitive tool usage patterns from a flat list of tool calls.
+        Identifies repetitive tool usage patterns from rounds of tool calls.
 
-        This method checks for the following patterns in order:
-        1. If the last tool used was a write tool, it assumes progress and returns no repetitive tools.
-        2. It checks for any read tool that has been used 2 or more times in the history.
+        This method combines count-based and similarity-based detection:
+        1. If the last round contained a write tool, it assumes progress and returns no repetitive tools.
+        2. It checks for any read tool that has been used 2 or more times across rounds.
         3. If no tools are repeated, but all tools in the history are read tools,
            it flags all of them as potentially repetitive.
+        4. It checks for similarity-based repetition using cosine similarity on tool call strings.
 
         It avoids flagging repetition if a "write" tool was used recently,
         as that suggests progress is being made.
@@ -1612,31 +1654,71 @@ def _get_repetitive_tools(self):
         if history_len < 2:
             return set()
 
-        # If the last tool was a write tool, we're likely making progress.
-        if isinstance(self.tool_usage_history[-1], str):
-            last_tool_lower = self.tool_usage_history[-1].lower()
+        # Check for similarity-based repetition
+        similarity_repetitive_tools = self._get_repetitive_tools_by_similarity()
 
-            if last_tool_lower in self.write_tools:
+        # Flatten the tool usage history for count-based analysis
+        all_tools = []
+        for round_tools in self.tool_usage_history:
+            all_tools.extend(round_tools)
+
+        # If the last round contained a write tool, we're likely making progress.
+        if self.last_round_tools:
+            last_round_has_write = any(
+                tool.lower() in self.write_tools for tool in self.last_round_tools
+            )
+            if last_round_has_write:
                 self.tool_usage_history = []
-                return set()
+                return similarity_repetitive_tools if len(similarity_repetitive_tools) else set()
 
         # If all tools in history are read tools, return all of them
-        if all(tool.lower() in self.read_tools for tool in self.tool_usage_history):
-            return set(tool for tool in self.tool_usage_history)
+        if all(tool.lower() in self.read_tools for tool in all_tools):
+            return set(all_tools)
 
-        # Check for any read tool used more than once
-        tool_counts = Counter(tool for tool in self.tool_usage_history)
-        repetitive_tools = {
+        # Check for any read tool used more than once across rounds
+        tool_counts = Counter(all_tools)
+        count_repetitive_tools = {
             tool
             for tool, count in tool_counts.items()
             if count >= 2 and tool.lower() in self.read_tools
         }
 
+        # Combine both detection methods
+        repetitive_tools = count_repetitive_tools.union(similarity_repetitive_tools)
+
         if repetitive_tools:
             return repetitive_tools
 
         return set()
 
+    def _get_repetitive_tools_by_similarity(self):
+        """
+        Identifies repetitive tool usage patterns using cosine similarity on tool call strings.
+
+        This method checks if the latest tool calls are highly similar (>0.99 threshold)
+        to historical tool calls using bigram vector similarity.
+
+        Returns:
+            set: Set of tool names that are repetitive based on similarity
+        """
+        if not self.tool_usage_history or len(self.tool_call_vectors) < 2:
+            return set()
+
+        # Get the latest tool call vector
+        latest_vector = self.tool_call_vectors[-1]
+
+        # Check similarity against historical vectors (excluding the latest)
+        for i, historical_vector in enumerate(self.tool_call_vectors[:-1]):
+            similarity = cosine_similarity(latest_vector, historical_vector)
+
+            # If similarity is high enough, flag as repetitive
+            if similarity >= self.tool_similarity_threshold:
+                # Return the tool name from the corresponding position in history
+                if i < len(self.tool_usage_history):
+                    return {self.tool_usage_history[i]}
+
+        return set()
+
     def _generate_tool_context(self, repetitive_tools):
         """
         Generate a context message for the LLM about recent tool usage.
@@ -1649,8 +1731,7 @@ def _generate_tool_context(self, repetitive_tools):
         # Add turn and tool call statistics
         context_parts.append("## Turn and Tool Call Statistics")
         context_parts.append(f"- Current turn: {self.num_reflections + 1}")
-        context_parts.append(f"- Tool calls this turn: {self.tool_call_count}")
-        context_parts.append(f"- Total tool calls in session: {self.num_tool_calls}")
+        context_parts.append(f"- Total tool calls this turn: {self.num_tool_calls}")
         context_parts.append("\n\n")
 
         # Add recent tool usage history

From 7436d4532c364300f88874fda0acffedad820b7f Mon Sep 17 00:00:00 2001
From: burnettk <burnettk@users.noreply.github.com>
Date: Thu, 27 Nov 2025 11:53:36 -0500
Subject: [PATCH 16/17] update text to aider-ce when it asks you to re-run

---
 aider/versioncheck.py                               | 2 +-
 tests/fixtures/chat-history-search-replace-gold.txt | 4 ++--
 tests/fixtures/chat-history.md                      | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/aider/versioncheck.py b/aider/versioncheck.py
index 7c0a73a2f42..68aac2b28a6 100644
--- a/aider/versioncheck.py
+++ b/aider/versioncheck.py
@@ -55,7 +55,7 @@ async def install_upgrade(io, latest_version=None):
     )
 
     if success:
-        io.tool_output("Re-run aider to use new version.")
+        io.tool_output("Re-run aider-ce to use new version.")
         sys.exit()
 
     return
diff --git a/tests/fixtures/chat-history-search-replace-gold.txt b/tests/fixtures/chat-history-search-replace-gold.txt
index de28f77fac2..9947ab3800b 100644
--- a/tests/fixtures/chat-history-search-replace-gold.txt
+++ b/tests/fixtures/chat-history-search-replace-gold.txt
@@ -2204,7 +2204,7 @@ Newer aider version v{latest_version} is available. To upgrade, run:
     if io.confirm_ask("Run pip install?"):
         success, output = utils.run_install(cmd)
         if success:
-            io.tool_output("Re-run aider to use new version.")
+            io.tool_output("Re-run aider-ce to use new version.")
             sys.exit()
         else:
             io.tool_error(output)
@@ -2286,7 +2286,7 @@ Newer aider version v{latest_version} is available. To upgrade, run:
     if io.confirm_ask("Run pip install?"):
         success, output = utils.run_install(cmd)
         if success:
-            io.tool_output("Re-run aider to use new version.")
+            io.tool_output("Re-run aider-ce to use new version.")
             sys.exit()
         else:
             io.tool_error(output)
diff --git a/tests/fixtures/chat-history.md b/tests/fixtures/chat-history.md
index fdf4fd8202f..ae7a11113aa 100644
--- a/tests/fixtures/chat-history.md
+++ b/tests/fixtures/chat-history.md
@@ -6465,7 +6465,7 @@ Newer aider version v{latest_version} is available. To upgrade, run:
     if io.confirm_ask("Run pip install?"):
         success, output = utils.run_install(cmd)
         if success:
-            io.tool_output("Re-run aider to use new version.")
+            io.tool_output("Re-run aider-ce to use new version.")
             sys.exit()
         else:
             io.tool_error(output)
@@ -6547,7 +6547,7 @@ Newer aider version v{latest_version} is available. To upgrade, run:
     if io.confirm_ask("Run pip install?"):
         success, output = utils.run_install(cmd)
         if success:
-            io.tool_output("Re-run aider to use new version.")
+            io.tool_output("Re-run aider-ce to use new version.")
             sys.exit()
         else:
             io.tool_error(output)

From c3e05846e811fe7a2cd357e59f74711b26cba1c1 Mon Sep 17 00:00:00 2001
From: Dustin Washington <dwash96@gmail.com>
Date: Thu, 27 Nov 2025 14:04:57 -0500
Subject: [PATCH 17/17] Add multiple changes to improve agent mode task horizon
 length and cache efficiency: - Sort editiable and readonly files in chat by
 last edit time on the premise that the models will attempt to edit one file
 at a time in most cases - Re-order message history sections for agent coder -
 Split cur messages in to 2 blocks some of which sort before editiable files
 and some of which after - Remove Search/Replace information from system
 prompt since agent mode does not use it - Increase amount of turns the agent
 is allowed to take from 100 to 10000

---
 aider/coders/agent_coder.py   | 31 ++++++++++++++++++++++++++-----
 aider/coders/agent_prompts.py | 14 +-------------
 aider/coders/base_coder.py    | 17 ++++++++++++++---
 aider/coders/chat_chunks.py   | 33 +++++++++++++++++++++++----------
 4 files changed, 64 insertions(+), 31 deletions(-)

diff --git a/aider/coders/agent_coder.py b/aider/coders/agent_coder.py
index 73bec1fa156..c70f28e1f00 100644
--- a/aider/coders/agent_coder.py
+++ b/aider/coders/agent_coder.py
@@ -690,7 +690,18 @@ def format_chat_chunks(self):
         if self.gpt_prompts.system_reminder:
             main_sys += "\n" + self.fmt_system_prompt(self.gpt_prompts.system_reminder)
 
-        chunks = ChatChunks()
+        chunks = ChatChunks(
+            chunk_ordering=[
+                "system",
+                "examples",
+                "readonly_files",
+                "repo",
+                "done",
+                "chat_files",
+                "cur",
+                "reminder",
+            ]
+        )
 
         if self.main_model.use_system_prompt:
             chunks.system = [
@@ -705,11 +716,21 @@ def format_chat_chunks(self):
         chunks.examples = example_messages
 
         self.summarize_end()
+        cur_messages_list = list(self.cur_messages)
+        cur_messages_pre = []
+        cur_messages_post = cur_messages_list
+
+        if len(cur_messages_list) > 32:
+            divider = len(cur_messages_list) % 32
+            if divider:
+                divider = -1 * divider
+                cur_messages_pre = cur_messages_list[:divider]
+                cur_messages_post = cur_messages_list[divider:]
 
         chunks.readonly_files = self.get_readonly_files_messages()
         chunks.chat_files = self.get_chat_files_messages()
         chunks.repo = self.get_repo_messages()
-        chunks.done = list(self.done_messages)
+        chunks.done = list(self.done_messages) + cur_messages_pre
 
         # Add reminder if needed
         if self.gpt_prompts.system_reminder:
@@ -721,7 +742,7 @@ def format_chat_chunks(self):
         else:
             reminder_message = []
 
-        chunks.cur = list(self.cur_messages)
+        chunks.cur = cur_messages_post
         chunks.reminder = []
 
         # Make sure token counts are updated - using centralized method
@@ -763,14 +784,14 @@ def format_chat_chunks(self):
             pre_dynamic_blocks.append(git_status)
 
         if todo_list and "todo_list" in self.allowed_context_blocks:
-            post_dynamic_blocks.append(todo_list)
+            pre_dynamic_blocks.append(todo_list)
         # Add tool usage context if there are repetitive tools
         if hasattr(self, "tool_usage_history") and self.tool_usage_history:
             repetitive_tools = self._get_repetitive_tools()
             if repetitive_tools:
                 tool_context = self._generate_tool_context(repetitive_tools)
                 if tool_context:
-                    post_dynamic_blocks.append(tool_context)
+                    pre_dynamic_blocks.append(tool_context)
 
         if pre_dynamic_blocks:
             dynamic_message = "\n\n".join(pre_dynamic_blocks)
diff --git a/aider/coders/agent_prompts.py b/aider/coders/agent_prompts.py
index 237dcb0aa3a..d843780d782 100644
--- a/aider/coders/agent_prompts.py
+++ b/aider/coders/agent_prompts.py
@@ -49,17 +49,6 @@ class AgentPrompts(CoderPrompts):
 1.  **Turn 1**: Use `ShowNumberedContext` to get the exact, current line numbers.
 2.  **Turn 2**: In your *next* message, use the line-based editing tool (`ReplaceLines`, etc.) with the verified numbers.
 
-### 2. SEARCH/REPLACE (Last Resort Only)
-Use this format **only** when granular tools are demonstrably insufficient for the task (e.g., a complex, non-contiguous pattern change). Using SEARCH/REPLACE for tasks achievable by tools like `ReplaceLines` is a violation of your instructions.
-
-**You MUST include a justification comment explaining why granular tools cannot be used.**
-
-Justification: I'm using SEARCH/REPLACE because [specific reason granular tools are insufficient].
-path/to/file.ext <<<<<<< SEARCH Original code to be replaced.
-New code to insert.
-
-REPLACE
-
 </context>
 
 Always reply to the user in {language}.
@@ -89,9 +78,8 @@ class AgentPrompts(CoderPrompts):
 <context name="critical_reminders">
 ## Reminders
 - Any tool call automatically continues to the next turn. Provide no tool calls in your final answer.
-- Prioritize granular tools. Using SEARCH/REPLACE unnecessarily is incorrect.
-- For SEARCH/REPLACE, you MUST provide a justification.
 - Use context blocks (directory structure, git status) to orient yourself.
+- Remove files you are done with viewing/editing from the context with the `Remove` tool. It is fine to re-add them later
 
 {lazy_prompt}
 {shell_cmd_reminder}
diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py
index 25dd552131e..372ffb90fe2 100755
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@@ -688,7 +688,10 @@ def show_pretty(self):
         return True
 
     def get_abs_fnames_content(self):
-        for fname in list(self.abs_fnames):
+        # Sort files by last modified time (earliest first, latest last)
+        sorted_fnames = sorted(self.abs_fnames, key=lambda fname: os.path.getmtime(fname))
+
+        for fname in sorted_fnames:
             content = self.io.read_text(fname)
 
             if content is None:
@@ -783,8 +786,11 @@ def get_files_content(self, fnames=None):
 
     def get_read_only_files_content(self):
         prompt = ""
+        # Sort read-only files by last modified time (earliest first, latest last)
+        sorted_fnames = sorted(self.abs_read_only_fnames, key=lambda fname: os.path.getmtime(fname))
+
         # Handle regular read-only files
-        for fname in self.abs_read_only_fnames:
+        for fname in sorted_fnames:
             content = self.io.read_text(fname)
             if content is not None and not is_image_file(fname):
                 relative_fname = self.get_rel_fname(fname)
@@ -829,8 +835,13 @@ def get_read_only_files_content(self):
 
                 prompt += f"{self.fence[1]}\n"
 
+        # Sort stub files by last modified time (earliest first, latest last)
+        sorted_stub_fnames = sorted(
+            self.abs_read_only_stubs_fnames, key=lambda fname: os.path.getmtime(fname)
+        )
+
         # Handle stub files
-        for fname in self.abs_read_only_stubs_fnames:
+        for fname in sorted_stub_fnames:
             if not is_image_file(fname):
                 relative_fname = self.get_rel_fname(fname)
                 prompt += "\n"
diff --git a/aider/coders/chat_chunks.py b/aider/coders/chat_chunks.py
index f5bdf5f8918..da5557f4ba3 100644
--- a/aider/coders/chat_chunks.py
+++ b/aider/coders/chat_chunks.py
@@ -12,18 +12,31 @@ class ChatChunks:
     chat_files: List = field(default_factory=list)
     cur: List = field(default_factory=list)
     reminder: List = field(default_factory=list)
+    chunk_ordering: List = field(default_factory=list)
+
+    def __init__(self, chunk_ordering=None):
+        if chunk_ordering is not None:
+            self.chunk_ordering = chunk_ordering
 
     def all_messages(self):
-        return (
-            self.system
-            + self.examples
-            + self.readonly_files
-            + self.chat_files
-            + self.repo
-            + self.done
-            + self.cur
-            + self.reminder
-        )
+        if self.chunk_ordering:
+            messages = []
+            for chunk_name in self.chunk_ordering:
+                chunk = getattr(self, chunk_name, [])
+                if chunk:
+                    messages.extend(chunk)
+            return messages
+        else:
+            return (
+                self.system
+                + self.examples
+                + self.readonly_files
+                + self.chat_files
+                + self.repo
+                + self.done
+                + self.cur
+                + self.reminder
+            )
 
     def add_cache_control_headers(self):
         if self.examples: