From 3e58c2fe8257512a23931385d56dd8d4b4b33aad Mon Sep 17 00:00:00 2001 From: tarilabs Date: Mon, 4 May 2026 15:00:47 +0200 Subject: [PATCH 1/2] perf: reuse searcher across batch queries in find_evidence.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In batch mode, ensure_index() was called per query via retrieve_evidence(), redundantly reloading the SentenceTransformer model, reopening the Milvus DB, and rebuilding the entire BM25 index on every iteration (~2-6s overhead each). Call ensure_index() once before the loop and reuse the returned searcher for all queries within each batch invocation. This saves ~18-84s in the scope-req-audit step (~10-15 queries) and ~18-174s in the code-evidence step (~10-30 queries with two-pass retrieval). Single-query mode is unchanged — it still delegates to retrieve_evidence(). Co-Authored-By: Claude Signed-off-by: tarilabs --- .../code-evidence/scripts/find_evidence.py | 66 +++++++++++++++---- 1 file changed, 52 insertions(+), 14 deletions(-) diff --git a/plugins/docs-tools/skills/code-evidence/scripts/find_evidence.py b/plugins/docs-tools/skills/code-evidence/scripts/find_evidence.py index 736e9d77..c1ce1fd6 100755 --- a/plugins/docs-tools/skills/code-evidence/scripts/find_evidence.py +++ b/plugins/docs-tools/skills/code-evidence/scripts/find_evidence.py @@ -23,6 +23,7 @@ import argparse import json import sys +from pathlib import Path def _parse_filter_paths(raw): @@ -32,6 +33,47 @@ def _parse_filter_paths(raw): return [p.strip() for p in raw.split(",") if p.strip()] +def _resolve_filter_paths(repo_path, filter_paths): + """Resolve filter paths relative to repo root to match index entries.""" + if not filter_paths: + return None + repo_root = Path(repo_path).resolve() + return [str((repo_root / p).resolve()) for p in filter_paths] + + +def _format_result(query, filter_paths, repo_path, index_info, results): + """Format searcher results into the evidence retrieval output dict.""" + return { + "query": query, + "repo_path": repo_path, + "result_count": len(results), + "index_info": index_info, + "results": [ + { + "rank": i + 1, + "file_path": r.file_path, + "file_name": r.file_name, + "start_line": r.start_line, + "end_line": r.end_line, + "language": r.language, + "chunk_type": r.chunk_type, + "chunk_name": r.chunk_name, + "parent_context": r.parent_context, + "signature": r.signature, + "docstring": r.docstring, + "return_type": r.return_type, + "content": r.content, + "scores": { + "vector": round(r.vector_score, 4), + "bm25": round(r.bm25_score, 4), + "combined": round(r.combined_score, 4), + }, + } + for i, r in enumerate(results) + ], + } + + def _run_single(retrieve_evidence, repo, query, limit, filter_paths, reindex): """Run a single evidence retrieval and return the result dict.""" return retrieve_evidence( @@ -89,6 +131,7 @@ def main(): try: from claude_context.skills.evidence_retrieval import retrieve_evidence + from claude_context.skills._index_manager import ensure_index except ImportError: print( "Error: code-finder is not installed. Run this script via:\n" @@ -98,7 +141,7 @@ def main(): ) sys.exit(1) - # Single query mode + # Single query mode — use retrieve_evidence directly (one-shot, no reuse needed) if args.query: filter_paths = _parse_filter_paths(args.filter_paths) result = _run_single( @@ -113,24 +156,19 @@ def main(): print() return - # Batch mode + # Batch mode — call ensure_index once, reuse searcher for all queries + searcher, index_info = ensure_index(args.repo, reindex=args.reindex) + repo_path = str(Path(args.repo).resolve()) + results = [] - for i, entry in enumerate(queries): + for entry in queries: query = entry["query"] limit = entry.get("limit", args.limit) filter_paths = entry.get("filter_paths") + resolved = _resolve_filter_paths(repo_path, filter_paths) - # Only reindex on the first query — subsequent queries reuse the cache - reindex = args.reindex and i == 0 - - result = _run_single( - retrieve_evidence, - args.repo, - query, - limit, - filter_paths, - reindex, - ) + raw = searcher.search(query=query, limit=limit, filter_paths=resolved) + result = _format_result(query, filter_paths, repo_path, index_info, raw) results.append({"query": query, "filter_paths": filter_paths, "result": result}) json.dump(results, sys.stdout, indent=2, default=str) From 5e18ff36eaa7ded597558409e945d93adf5b8579 Mon Sep 17 00:00:00 2001 From: Matteo Mortari Date: Mon, 4 May 2026 16:48:11 +0200 Subject: [PATCH 2/2] Update plugins/docs-tools/skills/code-evidence/scripts/find_evidence.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- .../docs-tools/skills/code-evidence/scripts/find_evidence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/docs-tools/skills/code-evidence/scripts/find_evidence.py b/plugins/docs-tools/skills/code-evidence/scripts/find_evidence.py index c1ce1fd6..d6b30287 100755 --- a/plugins/docs-tools/skills/code-evidence/scripts/find_evidence.py +++ b/plugins/docs-tools/skills/code-evidence/scripts/find_evidence.py @@ -130,8 +130,8 @@ def main(): sys.exit(1) try: - from claude_context.skills.evidence_retrieval import retrieve_evidence from claude_context.skills._index_manager import ensure_index + from claude_context.skills.evidence_retrieval import retrieve_evidence except ImportError: print( "Error: code-finder is not installed. Run this script via:\n"