From 45f80399bc864bc109448c1e4773b5584f87b9b5 Mon Sep 17 00:00:00 2001 From: Dustin Washington Date: Sun, 19 Oct 2025 14:11:42 -0400 Subject: [PATCH 1/2] Bump Version --- aider/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aider/__init__.py b/aider/__init__.py index ea0868a76cb..67bd1c17f2a 100644 --- a/aider/__init__.py +++ b/aider/__init__.py @@ -1,6 +1,6 @@ from packaging import version -__version__ = "0.87.12.dev" +__version__ = "0.87.13.dev" safe_version = __version__ try: From 50826578eadb95fde5125d364de93c5a2ea608e0 Mon Sep 17 00:00:00 2001 From: Dustin Washington Date: Sun, 19 Oct 2025 14:14:28 -0400 Subject: [PATCH 2/2] #45 - Add logarithmic equations to moderate repo map weights for page rankings --- README.md | 12 ++++++------ aider/repomap.py | 39 +++++++++++++++++++++++++++++++++------ 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 4e4cadcb009..0f905cdfd03 100644 --- a/README.md +++ b/README.md @@ -6,22 +6,22 @@ The current priorities are to improve core capabilities and user experience of t * [x] Refactor codebase to have the main loop run asynchronously * [x] Update test harness to work with new asynchronous methods -2. **Repo Map Accuracy** - * [ ] [Bias page ranking toward active/editable files in repo map parsing](https://github.com/Aider-AI/aider/issues/2405) +2. **Repo Map Accuracy** - [Discussion](https://github.com/dwash96/aider-ce/issues/45) + * [x] [Bias page ranking toward active/editable files in repo map parsing](https://github.com/Aider-AI/aider/issues/2405) + * [ ] [Include AST information in repo map for richer context](https://github.com/Aider-AI/aider/issues/2688) * [ ] [Handle non-unique symbols that break down in large codebases](https://github.com/Aider-AI/aider/issues/2341) - * [ ] [Include AST information in repo map for richer context](https://github.com/Aider-AI/aider/issues/2688) -3. **Context Discovery** +3. **Context Discovery** - [Discussion](https://github.com/dwash96/aider-ce/issues/46) * [ ] Develop AST-based search capabilities * [ ] Enhance file search with ripgrep integration * [ ] Implement RAG (Retrieval-Augmented Generation) for better code retrieval * [ ] Build an explicit workflow and local tooling for internal discovery mechanisms -4. **Context Delivery** +4. **Context Delivery** - [Discussion](https://github.com/dwash96/aider-ce/issues/47) * [ ] Use workflow for internal discovery to better target file snippets needed for specific tasks * [ ] Add support for partial files and code snippets in model completion messages -5. **TUI Experience** +5. **TUI Experience** - [Discussion](https://github.com/dwash96/aider-ce/issues/48) * [ ] Add a full TUI (probably using textual) to have a visual interface competitive with the other coding agent terminal programs * [ ] Re-integrate pretty output formatting * [ ] Implement a response area, a prompt area with current auto completion capabilities, and a helper area for management utility commands diff --git a/aider/repomap.py b/aider/repomap.py index 3aee43bd333..b56684363f5 100644 --- a/aider/repomap.py +++ b/aider/repomap.py @@ -612,6 +612,7 @@ def get_ranked_tags( personalization[rel_fname] = current_pers # Assign the final calculated value tags = list(self.get_tags(fname, rel_fname)) + if tags is None: continue @@ -643,7 +644,7 @@ def get_ranked_tags( if ident in references: continue for definer in defines[ident]: - G.add_edge(definer, definer, weight=0.1, ident=ident) + G.add_edge(definer, definer, weight=0.01, ident=ident) for ident in idents: if progress: @@ -658,12 +659,32 @@ def get_ranked_tags( is_camel = any(c.isupper() for c in ident) and any(c.islower() for c in ident) if ident in mentioned_idents: mul *= 10 - if (is_snake or is_kebab or is_camel) and len(ident) >= 8: + + # Prioritize function-like identifiers + if ( + (is_snake or is_kebab or is_camel) + and len(ident) >= 8 + and "test" not in ident.lower() + ): mul *= 10 - if ident.startswith("_"): - mul *= 0.1 + + # Downplay repetitive definitions in case of common boiler plate + # Scale down logarithmically given the increasing number of references in a codebase + # Ideally, this will help downweight boiler plate in frameworks, interfaces, and abstract classes if len(defines[ident]) > 5: - mul *= 0.1 + mul *= math.log((5 / (len(defines[ident]) ** 2)) + 1) + + # Calculate multiplier: log(number of unique file references * total references ^ 2) + # Used to balance the number of times an identifier appears with its number of refs per file + # Penetration in code base is important + # So is the frequency + # And the logarithm keeps them from scaling out of bounds forever + # Combined with the above downweighting + # There should be a push/pull that balances repetitiveness of identifier defs + # With absolute number of references throughout a codebase + unique_file_refs = len(set(references[ident])) + total_refs = len(references[ident]) + ext_mul = math.log(unique_file_refs * total_refs**2 + 1) for referencer, num_refs in Counter(references[ident]).items(): for definer in definers: @@ -671,7 +692,13 @@ def get_ranked_tags( # if referencer == definer: # continue - use_mul = mul + # Only add edge if file extensions match + referencer_ext = Path(referencer).suffix + definer_ext = Path(definer).suffix + if referencer_ext != definer_ext: + continue + + use_mul = mul * ext_mul if referencer in chat_rel_fnames: use_mul *= 50