diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 45038cd14e2..9b646254b88 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import asyncio +import os import base64 import hashlib import json @@ -367,6 +368,7 @@ def __init__( context_compaction_max_tokens=None, context_compaction_summary_tokens=8192, map_cache_dir=".", + repomap_in_memory=False, ): # initialize from args.map_cache_dir self.map_cache_dir = map_cache_dir @@ -555,6 +557,8 @@ def __init__( map_mul_no_files=map_mul_no_files, refresh=map_refresh, max_code_line_length=map_max_line_length, + repo_root=self.root, + use_memory_cache=repomap_in_memory, ) self.summarizer = summarizer or ChatSummary( @@ -853,6 +857,19 @@ def get_repo_map(self, force_refresh=False): mentioned_fnames.update(self.get_ident_filename_matches(mentioned_idents)) all_abs_files = set(self.get_all_abs_files()) + + # Exclude metadata/docs from repo map inputs to reduce parsing overhead + def _include_in_map(abs_path): + try: + rel = self.get_rel_fname(abs_path) + except Exception: + rel = str(abs_path) + parts = Path(rel).parts + if ".meta" in parts or ".docs" in parts: + return False + return True + + all_abs_files = {p for p in all_abs_files if _include_in_map(p)} repo_abs_read_only_fnames = set(self.abs_read_only_fnames) & all_abs_files repo_abs_read_only_stubs_fnames = set(self.abs_read_only_stubs_fnames) & all_abs_files chat_files = ( diff --git a/aider/repomap.py b/aider/repomap.py index e96ed0446fe..3aee43bd333 100644 --- a/aider/repomap.py +++ b/aider/repomap.py @@ -146,15 +146,22 @@ def __init__( map_mul_no_files=8, refresh="auto", max_code_line_length=100, + repo_root=None, + use_memory_cache=False, ): self.io = io self.verbose = verbose self.refresh = refresh self.map_cache_dir = map_cache_dir - self.root = os.getcwd() + # Prefer an explicit repo root (eg per-test repo), fallback to CWD + self.root = repo_root or os.getcwd() - self.load_tags_cache() + # Allow opting into an in-memory tags cache to avoid disk/SQLite locks + if use_memory_cache: + self.TAGS_CACHE = dict() + else: + self.load_tags_cache() self.cache_threshold = 0.95 self.max_map_tokens = map_tokens diff --git a/benchmark/README.md b/benchmark/README.md index 7765c00b79c..988406de687 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -83,6 +83,7 @@ You can run `./benchmark/benchmark.py --help` for a list of all the arguments, b - `--num-tests` specifies how many of the tests to run before stopping. This is another way to start gently as you debug your benchmarking setup. - `--keywords` filters the tests to run to only the ones whose name match the supplied argument (similar to `pytest -k xxxx`). - `--read-model-settings=` specify model settings, see here: https://aider.chat/docs/config/adv-model-settings.html#model-settings +- `--map-tokens` sets a token budget for the repo map sent with each request. Set `0` to disable the repo map. This lets you enable repo map usage for any model (e.g., `--map-tokens 1024`). ### Benchmark report diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index cc694a9f70d..180d3b4f04e 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -15,20 +15,24 @@ from types import SimpleNamespace from typing import List, Optional -import git -import importlib_resources -import lox -import pandas as pd -import prompts +""" +Performance-oriented refactors: +- Avoid heavy imports unless needed for a given code path. +- Fast path for `--stats` to skip GitPython and benchmarking deps. +- Build DataFrame / import plotting only when `--graphs` is true. +- Use json.load for result file parsing to reduce memory churn. +- Cache git version lookups across a single invocation. +""" + +# Heavy modules are lazily imported within the code paths that need them. import typer from dotenv import load_dotenv -from plots import plot_refactoring from rich.console import Console -from aider import models, sendchat -from aider.coders import Coder, base_coder from aider.dump import dump # noqa: F401 -from aider.io import InputOutput + +# Cache for commit-hash -> version lookup +_VERSION_CACHE = {} BENCHMARK_DNAME = Path(os.environ.get("AIDER_BENCHMARK_DIR", "tmp.benchmarks")) @@ -122,11 +126,12 @@ def show_stats(dirnames, graphs, stats_languages=None): repeat_hi = repeat_lo = repeat_avg = None # noqa: F841 - df = pd.DataFrame.from_records(rows) - # df.sort_values(by=["model", "edit_format"], inplace=True) - - # dump(df) + # Only build a DataFrame and import plotting libs when graphs are requested if graphs: + import pandas as pd # Lazy import + from plots import plot_refactoring # Lazy import + + df = pd.DataFrame.from_records(rows) # plot_timing(df) # plot_outcomes(df, repeats, repeat_hi, repeat_lo, repeat_avg) # plot_outcomes_claude(df) @@ -212,15 +217,15 @@ def main( thinking_tokens: Optional[int] = typer.Option( None, "--thinking-tokens", help="Set thinking tokens for models that support it" ), + map_tokens: Optional[int] = typer.Option( + None, + "--map-tokens", + help="Suggested number of tokens for repo map (0 to disable)", + ), exercises_dir: str = typer.Option( EXERCISES_DIR_DEFAULT, "--exercises-dir", help="Directory with exercise files" ), ): - repo = git.Repo(search_parent_directories=True) - commit_hash = repo.head.object.hexsha[:7] - if repo.is_dirty(): - commit_hash += "-dirty" - if stats_only and not dirnames: latest_dir = find_latest_benchmark_dir() dirnames = [str(latest_dir)] @@ -241,6 +246,7 @@ def main( updated_dirnames.append(dirname) if stats_only: + # Fast path: avoid importing/initializing benchmarking deps return show_stats(updated_dirnames, graphs, stats_languages) if diffs_only: @@ -249,6 +255,18 @@ def main( assert len(updated_dirnames) == 1, updated_dirnames dirname = updated_dirnames[0] + # Lazy imports for the actual benchmark run + import git # Heavy; avoid for --stats/--diffs + import importlib_resources # Used for model metadata registration + import lox # Only needed for threaded runs + from aider import models, sendchat + from aider.coders import base_coder + + repo = git.Repo(search_parent_directories=True) + commit_hash = repo.head.object.hexsha[:7] + if repo.is_dirty(): + commit_hash += "-dirty" + if "AIDER_DOCKER" not in os.environ: print("Warning: benchmarking runs unvetted code from GPT, run in a docker container") return @@ -350,6 +368,9 @@ def get_exercise_dirs(base_dir, languages=None): base_coder.RETRY_TIMEOUT = LONG_TIMEOUT models.RETRY_TIMEOUT = LONG_TIMEOUT + # Enable in-memory RepoMap cache when running multiple threads to avoid SQLite contention + repomap_in_memory = threads > 1 + if threads == 1: all_results = [] for test_path in test_dnames: @@ -370,6 +391,8 @@ def get_exercise_dirs(base_dir, languages=None): sleep, reasoning_effort, thinking_tokens, + map_tokens, + repomap_in_memory, ) all_results.append(results) @@ -396,6 +419,8 @@ def get_exercise_dirs(base_dir, languages=None): sleep, reasoning_effort, thinking_tokens, + map_tokens, + repomap_in_memory, ) all_results = run_test_threaded.gather(tqdm=True) @@ -457,7 +482,8 @@ def load_results(dirname, stats_languages=None): for pattern in glob_patterns: for fname in dirname.glob(pattern): try: - results = json.loads(fname.read_text()) + with open(fname, "r", encoding="utf-8", errors="replace") as f: + results = json.load(f) all_results.append(results) except json.JSONDecodeError: print("json.JSONDecodeError", fname) @@ -497,6 +523,7 @@ def summarize_results(dirname, stats_languages=None): res.reasoning_effort = None res.thinking_tokens = None + res.map_tokens = None variants = defaultdict(set) for results in all_results: @@ -530,6 +557,7 @@ def summarize_results(dirname, stats_languages=None): res.reasoning_effort = results.get("reasoning_effort") res.thinking_tokens = results.get("thinking_tokens") + res.map_tokens = results.get("map_tokens") for key in "model edit_format commit_hash editor_model editor_edit_format".split(): val = results.get(key) @@ -578,6 +606,8 @@ def show(stat, red="red"): print(f" reasoning_effort: {res.reasoning_effort}") if res.thinking_tokens is not None: print(f" thinking_tokens: {res.thinking_tokens}") + if res.map_tokens is not None: + print(f" map_tokens: {res.map_tokens}") for i in range(tries): print(f" pass_rate_{i + 1}: {percents[i]:.1f}") @@ -602,7 +632,7 @@ def show(stat, red="red"): if variants["model"]: a_model = set(variants["model"]).pop() - command = f"aider --model {a_model}" + command = f"aider-ce --model {a_model}" print(f" command: {command}") print(f" date: {date}") @@ -634,14 +664,24 @@ def get_versions(commit_hashes): for hsh in commit_hashes: if not hsh: continue - hsh = hsh.split("-")[0] + short = hsh.split("-")[0] + if short in _VERSION_CACHE: + ver = _VERSION_CACHE.get(short) + if ver: + versions.add(ver) + continue + try: - version = subprocess.check_output( - ["git", "show", f"{hsh}:aider/__init__.py"], universal_newlines=True + version_src = subprocess.check_output( + ["git", "show", f"{short}:aider/__init__.py"], universal_newlines=True ) - version = re.search(r'__version__ = "(.*)"', version).group(1) - versions.add(version) + match = re.search(r'__version__ = "(.*)"', version_src) + ver = match.group(1) if match else None + _VERSION_CACHE[short] = ver + if ver: + versions.add(ver) except subprocess.CalledProcessError: + _VERSION_CACHE[short] = None pass return versions @@ -693,8 +733,17 @@ def run_test_real( sleep=0, reasoning_effort: Optional[str] = None, thinking_tokens: Optional[int] = None, + map_tokens: Optional[int] = None, read_model_settings=None, + repomap_in_memory: bool = False, ): + # Lazy imports: only needed in the actual benchmark execution path + from aider.io import InputOutput + from aider.coders import Coder + from aider import models + import prompts + import git + if not os.path.isdir(testdir): print("Not a dir:", testdir) return @@ -818,20 +867,45 @@ def run_test_real( dump(edit_format) show_fnames = ",".join(map(str, fnames)) print("fnames:", show_fnames) - - coder = Coder.create( - main_model, - edit_format, - io, + # Ensure this test directory is a standalone git repo so RepoMap can be used + try: + git_dir = testdir / ".git" + if not git_dir.exists(): + r = git.Repo.init(testdir) + # Set a local identity to avoid commit failures in clean containers + with r.config_writer() as cw: + cw.set_value("user", "name", "aider-benchmark") + cw.set_value("user", "email", "aider-benchmark@example.com") + # Add existing files (solution set and any current files) + r.index.add([str(p.relative_to(testdir)) for p in testdir.rglob("*") if p.is_file()]) + r.index.commit("Initial commit for aider benchmark") + except Exception as e: + if verbose: + print(f"Warning: failed to initialize git repo in {testdir}: {e}") + + coder_kwargs = dict( + main_model=main_model, + edit_format=edit_format, + io=io, fnames=fnames, - use_git=False, + use_git=True, + auto_commits=False, + dirty_commits=False, stream=False, verbose=verbose, # auto_lint=False, # disabled for code-in-json experiments cache_prompts=True, suggest_shell_commands=False, ignore_mentions=ignore_files, + # Reduce repo map contention and size for benchmarks + map_cache_dir=str(testdir), + repomap_in_memory=repomap_in_memory, + map_mul_no_files=4, ) + if map_tokens is not None: + coder_kwargs["map_tokens"] = map_tokens + + coder = Coder.create(**coder_kwargs) dump(coder.ignore_mentions) coder.show_announcements() @@ -960,6 +1034,7 @@ def run_test_real( prompt_tokens=coder.total_tokens_sent, completion_tokens=coder.total_tokens_received, thinking_tokens=thinking_tokens, + map_tokens=map_tokens, chat_hashes=list( zip( coder.chat_completion_call_hashes,