diff --git a/refactron/rag/indexer.py b/refactron/rag/indexer.py index 690cfe3..a0b8890 100644 --- a/refactron/rag/indexer.py +++ b/refactron/rag/indexer.py @@ -91,7 +91,7 @@ def __init__( self.parser = CodeParser() def index_repository( - self, repo_path: Optional[Path] = None, summarize: bool = False + self, repo_path: Optional[Path] = None, summarize: bool = False, batch_size: int = 100 ) -> IndexStats: """Index an entire repository. @@ -127,6 +127,8 @@ def index_repository( total_chunks = 0 chunk_type_counts: Dict[str, int] = {} + + current_batch: List[CodeChunk] = [] # Index each file for py_file in python_files: @@ -139,11 +141,20 @@ def index_repository( chunk_type_counts[chunk.chunk_type] = ( chunk_type_counts.get(chunk.chunk_type, 0) + 1 ) + + current_batch.extend(chunks) + + if len(current_batch) >= batch_size: + self.add_chunks(current_batch) + current_batch = [] except Exception as e: # Skip files that can't be parsed print(f"Warning: Could not index {py_file}: {e}") continue + if current_batch: + self.add_chunks(current_batch) + # Save index metadata self._save_metadata( { @@ -188,9 +199,6 @@ def _index_file(self, file_path: Path, summarize: bool = False) -> List[CodeChun except Exception as e: print(f"Warning: AI summarization failed for chunk in {file_path}: {e}") - # Add chunks to the index - self.add_chunks(chunks) - return chunks def _summarize_chunk(self, chunk: CodeChunk) -> Optional[str]: