cecli-dev · dwash96 · Sep 21, 2025 · Sep 8, 2025 · Sep 9, 2025
diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 
 import asyncio
+import os
 import base64
 import hashlib
 import json
@@ -367,6 +368,7 @@ def __init__(
         context_compaction_max_tokens=None,
         context_compaction_summary_tokens=8192,
         map_cache_dir=".",
+        repomap_in_memory=False,
     ):
         # initialize from args.map_cache_dir
         self.map_cache_dir = map_cache_dir
@@ -555,6 +557,8 @@ def __init__(
                 map_mul_no_files=map_mul_no_files,
                 refresh=map_refresh,
                 max_code_line_length=map_max_line_length,
+                repo_root=self.root,
+                use_memory_cache=repomap_in_memory,
             )
 
         self.summarizer = summarizer or ChatSummary(
@@ -853,6 +857,19 @@ def get_repo_map(self, force_refresh=False):
         mentioned_fnames.update(self.get_ident_filename_matches(mentioned_idents))
 
         all_abs_files = set(self.get_all_abs_files())
+
+        # Exclude metadata/docs from repo map inputs to reduce parsing overhead
+        def _include_in_map(abs_path):
+            try:
+                rel = self.get_rel_fname(abs_path)
+            except Exception:
+                rel = str(abs_path)
+            parts = Path(rel).parts
+            if ".meta" in parts or ".docs" in parts:
+                return False
+            return True
+
+        all_abs_files = {p for p in all_abs_files if _include_in_map(p)}
         repo_abs_read_only_fnames = set(self.abs_read_only_fnames) & all_abs_files
         repo_abs_read_only_stubs_fnames = set(self.abs_read_only_stubs_fnames) & all_abs_files
         chat_files = (

diff --git a/aider/repomap.py b/aider/repomap.py
@@ -146,15 +146,22 @@ def __init__(
         map_mul_no_files=8,
         refresh="auto",
         max_code_line_length=100,
+        repo_root=None,
+        use_memory_cache=False,
     ):
         self.io = io
         self.verbose = verbose
         self.refresh = refresh
 
         self.map_cache_dir = map_cache_dir
-        self.root = os.getcwd()
+        # Prefer an explicit repo root (eg per-test repo), fallback to CWD
+        self.root = repo_root or os.getcwd()
 
-        self.load_tags_cache()
+        # Allow opting into an in-memory tags cache to avoid disk/SQLite locks
+        if use_memory_cache:
+            self.TAGS_CACHE = dict()
+        else:
+            self.load_tags_cache()
         self.cache_threshold = 0.95
 
         self.max_map_tokens = map_tokens

diff --git a/benchmark/README.md b/benchmark/README.md
@@ -83,6 +83,7 @@ You can run `./benchmark/benchmark.py --help` for a list of all the arguments, b
 - `--num-tests` specifies how many of the tests to run before stopping. This is another way to start gently as you debug your benchmarking setup.
 - `--keywords` filters the tests to run to only the ones whose name match the supplied argument (similar to `pytest -k xxxx`).
 - `--read-model-settings=<filename.yml>` specify model settings, see here: https://aider.chat/docs/config/adv-model-settings.html#model-settings
+- `--map-tokens` sets a token budget for the repo map sent with each request. Set `0` to disable the repo map. This lets you enable repo map usage for any model (e.g., `--map-tokens 1024`).
 
 ### Benchmark report
 

diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
@@ -15,20 +15,24 @@
 from types import SimpleNamespace
 from typing import List, Optional
 
-import git
-import importlib_resources
-import lox
-import pandas as pd
-import prompts
+"""
+Performance-oriented refactors:
+- Avoid heavy imports unless needed for a given code path.
+- Fast path for `--stats` to skip GitPython and benchmarking deps.
+- Build DataFrame / import plotting only when `--graphs` is true.
+- Use json.load for result file parsing to reduce memory churn.
+- Cache git version lookups across a single invocation.
+"""
+
+# Heavy modules are lazily imported within the code paths that need them.
 import typer
 from dotenv import load_dotenv
-from plots import plot_refactoring
 from rich.console import Console
 
-from aider import models, sendchat
-from aider.coders import Coder, base_coder
 from aider.dump import dump  # noqa: F401
-from aider.io import InputOutput
+
+# Cache for commit-hash -> version lookup
+_VERSION_CACHE = {}
 
 BENCHMARK_DNAME = Path(os.environ.get("AIDER_BENCHMARK_DIR", "tmp.benchmarks"))
 
@@ -122,11 +126,12 @@ def show_stats(dirnames, graphs, stats_languages=None):
 
     repeat_hi = repeat_lo = repeat_avg = None  # noqa: F841
 
-    df = pd.DataFrame.from_records(rows)
-    # df.sort_values(by=["model", "edit_format"], inplace=True)
-
-    # dump(df)
+    # Only build a DataFrame and import plotting libs when graphs are requested
     if graphs:
+        import pandas as pd  # Lazy import
+        from plots import plot_refactoring  # Lazy import
+
+        df = pd.DataFrame.from_records(rows)
         # plot_timing(df)
         # plot_outcomes(df, repeats, repeat_hi, repeat_lo, repeat_avg)
         # plot_outcomes_claude(df)
@@ -212,15 +217,15 @@ def main(
     thinking_tokens: Optional[int] = typer.Option(
         None, "--thinking-tokens", help="Set thinking tokens for models that support it"
     ),
+    map_tokens: Optional[int] = typer.Option(
+        None,
+        "--map-tokens",
+        help="Suggested number of tokens for repo map (0 to disable)",
+    ),
     exercises_dir: str = typer.Option(
         EXERCISES_DIR_DEFAULT, "--exercises-dir", help="Directory with exercise files"
     ),
 ):
-    repo = git.Repo(search_parent_directories=True)
-    commit_hash = repo.head.object.hexsha[:7]
-    if repo.is_dirty():
-        commit_hash += "-dirty"
-
     if stats_only and not dirnames:
         latest_dir = find_latest_benchmark_dir()
         dirnames = [str(latest_dir)]
@@ -241,6 +246,7 @@ def main(
         updated_dirnames.append(dirname)
 
     if stats_only:
+        # Fast path: avoid importing/initializing benchmarking deps
         return show_stats(updated_dirnames, graphs, stats_languages)
 
     if diffs_only:
@@ -249,6 +255,18 @@ def main(
     assert len(updated_dirnames) == 1, updated_dirnames
     dirname = updated_dirnames[0]
 
+    # Lazy imports for the actual benchmark run
+    import git  # Heavy; avoid for --stats/--diffs
+    import importlib_resources  # Used for model metadata registration
+    import lox  # Only needed for threaded runs
+    from aider import models, sendchat
+    from aider.coders import base_coder
+
+    repo = git.Repo(search_parent_directories=True)
+    commit_hash = repo.head.object.hexsha[:7]
+    if repo.is_dirty():
+        commit_hash += "-dirty"
+
     if "AIDER_DOCKER" not in os.environ:
         print("Warning: benchmarking runs unvetted code from GPT, run in a docker container")
         return
@@ -350,6 +368,9 @@ def get_exercise_dirs(base_dir, languages=None):
     base_coder.RETRY_TIMEOUT = LONG_TIMEOUT
     models.RETRY_TIMEOUT = LONG_TIMEOUT
 
+    # Enable in-memory RepoMap cache when running multiple threads to avoid SQLite contention
+    repomap_in_memory = threads > 1
+
     if threads == 1:
         all_results = []
         for test_path in test_dnames:
@@ -370,6 +391,8 @@ def get_exercise_dirs(base_dir, languages=None):
                 sleep,
                 reasoning_effort,
                 thinking_tokens,
+                map_tokens,
+                repomap_in_memory,
             )
 
             all_results.append(results)
@@ -396,6 +419,8 @@ def get_exercise_dirs(base_dir, languages=None):
                 sleep,
                 reasoning_effort,
                 thinking_tokens,
+                map_tokens,
+                repomap_in_memory,
             )
         all_results = run_test_threaded.gather(tqdm=True)
 
@@ -457,7 +482,8 @@ def load_results(dirname, stats_languages=None):
     for pattern in glob_patterns:
         for fname in dirname.glob(pattern):
             try:
-                results = json.loads(fname.read_text())
+                with open(fname, "r", encoding="utf-8", errors="replace") as f:
+                    results = json.load(f)
                 all_results.append(results)
             except json.JSONDecodeError:
                 print("json.JSONDecodeError", fname)
@@ -497,6 +523,7 @@ def summarize_results(dirname, stats_languages=None):
 
     res.reasoning_effort = None
     res.thinking_tokens = None
+    res.map_tokens = None
     variants = defaultdict(set)
 
     for results in all_results:
@@ -530,6 +557,7 @@ def summarize_results(dirname, stats_languages=None):
 
         res.reasoning_effort = results.get("reasoning_effort")
         res.thinking_tokens = results.get("thinking_tokens")
+        res.map_tokens = results.get("map_tokens")
 
         for key in "model edit_format commit_hash editor_model editor_edit_format".split():
             val = results.get(key)
@@ -578,6 +606,8 @@ def show(stat, red="red"):
         print(f"  reasoning_effort: {res.reasoning_effort}")
     if res.thinking_tokens is not None:
         print(f"  thinking_tokens: {res.thinking_tokens}")
+    if res.map_tokens is not None:
+        print(f"  map_tokens: {res.map_tokens}")
 
     for i in range(tries):
         print(f"  pass_rate_{i + 1}: {percents[i]:.1f}")
@@ -602,7 +632,7 @@ def show(stat, red="red"):
 
     if variants["model"]:
         a_model = set(variants["model"]).pop()
-        command = f"aider --model {a_model}"
+        command = f"aider-ce --model {a_model}"
         print(f"  command: {command}")
 
     print(f"  date: {date}")
@@ -634,14 +664,24 @@ def get_versions(commit_hashes):
     for hsh in commit_hashes:
         if not hsh:
             continue
-        hsh = hsh.split("-")[0]
+        short = hsh.split("-")[0]
+        if short in _VERSION_CACHE:
+            ver = _VERSION_CACHE.get(short)
+            if ver:
+                versions.add(ver)
+            continue
+
         try:
-            version = subprocess.check_output(
-                ["git", "show", f"{hsh}:aider/__init__.py"], universal_newlines=True
+            version_src = subprocess.check_output(
+                ["git", "show", f"{short}:aider/__init__.py"], universal_newlines=True
             )
-            version = re.search(r'__version__ = "(.*)"', version).group(1)
-            versions.add(version)
+            match = re.search(r'__version__ = "(.*)"', version_src)
+            ver = match.group(1) if match else None
+            _VERSION_CACHE[short] = ver
+            if ver:
+                versions.add(ver)
         except subprocess.CalledProcessError:
+            _VERSION_CACHE[short] = None
             pass
     return versions
 
@@ -693,8 +733,17 @@ def run_test_real(
     sleep=0,
     reasoning_effort: Optional[str] = None,
     thinking_tokens: Optional[int] = None,
+    map_tokens: Optional[int] = None,
     read_model_settings=None,
+    repomap_in_memory: bool = False,
 ):
+    # Lazy imports: only needed in the actual benchmark execution path
+    from aider.io import InputOutput
+    from aider.coders import Coder
+    from aider import models
+    import prompts
+    import git
+
     if not os.path.isdir(testdir):
         print("Not a dir:", testdir)
         return
@@ -818,20 +867,45 @@ def run_test_real(
     dump(edit_format)
     show_fnames = ",".join(map(str, fnames))
     print("fnames:", show_fnames)
-
-    coder = Coder.create(
-        main_model,
-        edit_format,
-        io,
+    # Ensure this test directory is a standalone git repo so RepoMap can be used
+    try:
+        git_dir = testdir / ".git"
+        if not git_dir.exists():
+            r = git.Repo.init(testdir)
+            # Set a local identity to avoid commit failures in clean containers
+            with r.config_writer() as cw:
+                cw.set_value("user", "name", "aider-benchmark")
+                cw.set_value("user", "email", "aider-benchmark@example.com")
+            # Add existing files (solution set and any current files)
+            r.index.add([str(p.relative_to(testdir)) for p in testdir.rglob("*") if p.is_file()])
+            r.index.commit("Initial commit for aider benchmark")
+    except Exception as e:
+        if verbose:
+            print(f"Warning: failed to initialize git repo in {testdir}: {e}")
+
+    coder_kwargs = dict(
+        main_model=main_model,
+        edit_format=edit_format,
+        io=io,
         fnames=fnames,
-        use_git=False,
+        use_git=True,
+        auto_commits=False,
+        dirty_commits=False,
         stream=False,
         verbose=verbose,
         # auto_lint=False,  # disabled for code-in-json experiments
         cache_prompts=True,
         suggest_shell_commands=False,
         ignore_mentions=ignore_files,
+        # Reduce repo map contention and size for benchmarks
+        map_cache_dir=str(testdir),
+        repomap_in_memory=repomap_in_memory,
+        map_mul_no_files=4,
     )
+    if map_tokens is not None:
+        coder_kwargs["map_tokens"] = map_tokens
+
+    coder = Coder.create(**coder_kwargs)
     dump(coder.ignore_mentions)
 
     coder.show_announcements()
@@ -960,6 +1034,7 @@ def run_test_real(
         prompt_tokens=coder.total_tokens_sent,
         completion_tokens=coder.total_tokens_received,
         thinking_tokens=thinking_tokens,
+        map_tokens=map_tokens,
         chat_hashes=list(
             zip(
                 coder.chat_completion_call_hashes,