diff --git a/refactron/analysis/symbol_table.py b/refactron/analysis/symbol_table.py index de9bbf1..dcb3b5d 100644 --- a/refactron/analysis/symbol_table.py +++ b/refactron/analysis/symbol_table.py @@ -3,12 +3,13 @@ Maps classes, functions, variables, and their relationships across the codebase. """ +import hashlib import json import logging from dataclasses import dataclass, field from enum import Enum from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Set from refactron.core.inference import InferenceEngine @@ -61,16 +62,27 @@ class SymbolTable: symbols: Dict[str, Dict[str, Dict[str, Symbol]]] = field(default_factory=dict) # Map: global_name -> Symbol (for easy cross-file lookup of exports) exports: Dict[str, Symbol] = field(default_factory=dict) + # Map: file_path -> { "mtime": float, "size": int, "sha256": str } + file_metadata: Dict[str, Dict[str, Any]] = field(default_factory=dict) + + @staticmethod + def _normalize_path(path: str) -> str: + """Standardize path format for consistent keys/storage.""" + return Path(path).resolve().as_posix() def add_symbol(self, symbol: Symbol) -> None: """Add a symbol to the table.""" - if symbol.file_path not in self.symbols: - self.symbols[symbol.file_path] = {} + path = self._normalize_path(symbol.file_path) + # Ensure the symbol itself stores the normalized path + symbol.file_path = path + + if path not in self.symbols: + self.symbols[path] = {} - if symbol.scope not in self.symbols[symbol.file_path]: - self.symbols[symbol.file_path][symbol.scope] = {} + if symbol.scope not in self.symbols[path]: + self.symbols[path][symbol.scope] = {} - self.symbols[symbol.file_path][symbol.scope][symbol.name] = symbol + self.symbols[path][symbol.scope][symbol.name] = symbol # Track global exports (top-level functions and classes) if symbol.scope == "global" and symbol.type in ( @@ -78,13 +90,31 @@ def add_symbol(self, symbol: Symbol) -> None: SymbolType.FUNCTION, SymbolType.VARIABLE, ): - # Key by module path + name? Or just name for now? - # Using simple name collision strategy for MVP self.exports[symbol.name] = symbol + def remove_file(self, file_path: str) -> None: + """Remove all symbols and metadata associated with a file.""" + norm_path = self._normalize_path(file_path) + + if norm_path in self.symbols: + del self.symbols[norm_path] + + # Remove from exports + names_to_remove = [ + name + for name, sym in self.exports.items() + if self._normalize_path(sym.file_path) == norm_path + ] + for name in names_to_remove: + self.exports.pop(name, None) + + if norm_path in self.file_metadata: + del self.file_metadata[norm_path] + def get_symbol(self, file_path: str, name: str, scope: str = "global") -> Optional[Symbol]: """Retrieve a symbol.""" - return self.symbols.get(file_path, {}).get(scope, {}).get(name) + norm_path = self._normalize_path(file_path) + return self.symbols.get(norm_path, {}).get(scope, {}).get(name) def resolve_reference( self, name: str, current_file: str, current_scope: str @@ -106,8 +136,7 @@ def resolve_reference( if file_global: return file_global - # 3. Cross-file exports (Naive implementation) - # TODO: Enhance this with proper import resolution + # 3. Cross-file exports return self.exports.get(name) @@ -120,33 +149,85 @@ def __init__(self, cache_dir: Optional[Path] = None): self.inference_engine = InferenceEngine() def build_for_project(self, project_root: Path) -> SymbolTable: - """Scan project and build symbol table.""" + """Scan project and build symbol table incrementally.""" if self.cache_dir: - cached = self._load_cache() - if cached: - # TODO: Implement incremental update logic here - return cached + cached_table = self._load_cache() + if cached_table: + self.symbol_table = cached_table python_files = list(project_root.rglob("*.py")) + current_file_paths = {fp.resolve().as_posix() for fp in python_files} + + # 1. Remove deleted files + cached_files = list(self.symbol_table.file_metadata.keys()) + for cached_path in cached_files: + if cached_path not in current_file_paths: + logger.debug(f"Removing deleted file from symbol table: {cached_path}") + self.symbol_table.remove_file(cached_path) + + # 2. Analyze new or modified files for file_path in python_files: - self._analyze_file(file_path) + abs_path = file_path.resolve() + path_str = abs_path.as_posix() + if self._has_file_changed(abs_path, path_str): + logger.debug(f"Analyzing changed file: {path_str}") + self.symbol_table.remove_file(path_str) + self._analyze_file(abs_path) + self._update_file_metadata(abs_path, path_str) if self.cache_dir: self._save_cache() return self.symbol_table - def _analyze_file(self, file_path: Path) -> None: - """Analyze a single file and populate symbols.""" + def _has_file_changed(self, file_path: Path, file_path_str: str) -> bool: + """Check if file has changed since last analysis.""" + if file_path_str not in self.symbol_table.file_metadata: + return True + + metadata = self.symbol_table.file_metadata[file_path_str] + try: + stat = file_path.stat() + if stat.st_size != metadata.get("size"): + return True + + # Authoritative check: compare SHA-256 hashes + stored_hash = metadata.get("sha256") + if stored_hash: + current_hash = self._calculate_hash(file_path) + return current_hash != stored_hash + + return stat.st_mtime != metadata.get("mtime") + except Exception: + return True + + def _calculate_hash(self, file_path: Path) -> str: + """Calculate SHA-256 hash of file content.""" try: - # We use astroid for better inference capabilities later - tree = self.inference_engine.parse_file(str(file_path)) + return hashlib.sha256(file_path.read_bytes()).hexdigest() + except Exception: + return "" - # Walk the tree - self._visit_node(tree, str(file_path), "global") + def _update_file_metadata(self, file_path: Path, path_str: str) -> None: + """Update file metadata in symbol table.""" + try: + stat = file_path.stat() + self.symbol_table.file_metadata[path_str] = { + "mtime": stat.st_mtime, + "size": stat.st_size, + "sha256": self._calculate_hash(file_path), + } + except Exception as e: + logger.warning(f"Failed to update metadata for {path_str}: {e}") + def _analyze_file(self, file_path: Path) -> None: + """Analyze a single file and populate symbols.""" + path_str = file_path.resolve().as_posix() + try: + tree = self.inference_engine.parse_file(path_str) + self._visit_node(tree, path_str, "global") except Exception as e: - logger.warning(f"Failed to build symbol table for {file_path}: {e}") + logger.warning(f"Failed to build symbol table for {path_str}: {e}") def _visit_node(self, node: Any, file_path: str, scope: str) -> None: """Recursive node visitor.""" @@ -154,8 +235,8 @@ def _visit_node(self, node: Any, file_path: str, scope: str) -> None: new_scope = scope - if isinstance(node, (nodes.ClassDef, nodes.FunctionDef)): - # Register the definition itself in the CURRENT scope + # Recognize both FunctionDef and AsyncFunctionDef + if isinstance(node, (nodes.ClassDef, nodes.FunctionDef, nodes.AsyncFunctionDef)): symbol_type = ( SymbolType.CLASS if isinstance(node, nodes.ClassDef) else SymbolType.FUNCTION ) @@ -192,9 +273,8 @@ def _visit_node(self, node: Any, file_path: str, scope: str) -> None: self.symbol_table.add_symbol(symbol) # Recurse children - if hasattr(node, "get_children"): - for child in node.get_children(): - self._visit_node(child, file_path, new_scope) + for child in node.get_children(): + self._visit_node(child, file_path, new_scope) def _save_cache(self) -> None: """Save symbol table to cache.""" @@ -214,6 +294,7 @@ def _save_cache(self) -> None: for f, scopes in self.symbol_table.symbols.items() }, "exports": {n: sym.to_dict() for n, sym in self.symbol_table.exports.items()}, + "file_metadata": self.symbol_table.file_metadata, } with open(cache_file, "w") as f: @@ -238,15 +319,27 @@ def _load_cache(self) -> Optional[SymbolTable]: # Reconstruct symbols for f_path, scopes in data.get("symbols", {}).items(): - table.symbols[f_path] = {} + # Normalize path on load just in case + norm_f_path = SymbolTable._normalize_path(f_path) + table.symbols[norm_f_path] = {} for scope_name, names in scopes.items(): - table.symbols[f_path][scope_name] = {} + table.symbols[norm_f_path][scope_name] = {} for name, sym_data in names.items(): - table.symbols[f_path][scope_name][name] = Symbol.from_dict(sym_data) + sym = Symbol.from_dict(sym_data) + sym.file_path = norm_f_path + table.symbols[norm_f_path][scope_name][name] = sym # Reconstruct exports for name, sym_data in data.get("exports", {}).items(): - table.exports[name] = Symbol.from_dict(sym_data) + sym = Symbol.from_dict(sym_data) + sym.file_path = SymbolTable._normalize_path(sym.file_path) + table.exports[name] = sym + + # Reconstruct metadata + file_metadata = data.get("file_metadata", {}) + table.file_metadata = { + SymbolTable._normalize_path(k): v for k, v in file_metadata.items() + } return table diff --git a/refactron/core/inference.py b/refactron/core/inference.py index 99c42f9..7991032 100644 --- a/refactron/core/inference.py +++ b/refactron/core/inference.py @@ -3,6 +3,8 @@ Provides capabilities to infer types, values, and resolve symbols. """ +import os +from pathlib import Path from typing import Any, List, Optional import astroid @@ -28,10 +30,67 @@ def parse_string(code: str, module_name: str = "") -> nodes.Module: @staticmethod def parse_file(file_path: str) -> nodes.Module: """Parse a file into an astroid node tree.""" - builder = astroid.builder.AstroidBuilder(astroid.MANAGER) - with open(file_path, "r", encoding="utf-8") as f: - code = f.read() - return builder.string_build(code, modname=file_path) + # Use canonical path (resolved and posix-style for consistency) + abs_path = Path(file_path).resolve().as_posix() + manager = astroid.MANAGER + + # Aggressively clear cache for this file to ensure fresh AST + # Try both resolved and absolute paths to handle symlinks and normalization differences + raw_abs = os.path.abspath(file_path) + manager.astroid_cache.pop(abs_path, None) + manager.astroid_cache.pop(raw_abs, None) + manager.astroid_cache.pop(file_path, None) + + # 2. Find and clear by module name if it exists in caches + file_to_mod = getattr(manager, "file_to_module_cache", {}) + # Some versions use _mod_file_cache + if not file_to_mod: + file_to_mod = getattr(manager, "_mod_file_cache", {}) + + modname = ( + file_to_mod.get(abs_path) or file_to_mod.get(raw_abs) or file_to_mod.get(file_path) + ) + if modname: + manager.astroid_cache.pop(modname, None) + + # 3. Exhaustive search in astroid_cache for any module pointing to this file + for key, val in list(manager.astroid_cache.items()): + if hasattr(val, "file") and val.file: + val_path = Path(val.file).resolve().as_posix() + if val_path == abs_path or val_path == raw_abs.replace("\\", "/"): + manager.astroid_cache.pop(key, None) + + # 4. Clear the mappings themselves + for attr in ("file_to_module_cache", "_mod_file_cache"): + cache = getattr(manager, attr, None) + if isinstance(cache, dict): + cache.pop(abs_path, None) + cache.pop(raw_abs, None) + cache.pop(file_path, None) + + # 5. Read file and parse directly to bypass astroid's file cache + try: + with open(abs_path, "r", encoding="utf-8") as f: + code = f.read() + + # Resolve module name to keep astroid's state consistent + modname = "" + try: + from astroid import modutils + + modname = modutils.modname_from_path(abs_path) + except Exception: + pass + + # Use string_build via parse to avoid manager.ast_from_file's internal caching + return astroid.parse(code, module_name=modname, path=abs_path) + except (OSError, UnicodeDecodeError): + # Fallback to manager if manual read fails + try: + return manager.ast_from_file(abs_path) + except Exception as e: + # Fallback for virtual/non-existent files if needed + raise ValueError(f"Failed to parse {abs_path}: {e}") @staticmethod def infer_node(node: nodes.NodeNG, context: Optional[InferenceContext] = None) -> List[Any]: diff --git a/refactron/core/parallel.py b/refactron/core/parallel.py index 2fea0c0..808b6a6 100644 --- a/refactron/core/parallel.py +++ b/refactron/core/parallel.py @@ -7,7 +7,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple from refactron.core.analysis_result import FileAnalysisError -from refactron.core.models import FileMetrics +from refactron.core.models import FileMetrics, AnalysisSkipWarning logger = logging.getLogger(__name__) @@ -58,20 +58,25 @@ def __init__( def process_files( self, files: List[Path], - process_func: Callable[[Path], Tuple[Optional[FileMetrics], Optional[FileAnalysisError]]], + process_func: Callable[ + [Path], + Tuple[ + Optional[FileMetrics], Optional[FileAnalysisError], Optional[AnalysisSkipWarning] + ], + ], progress_callback: Optional[Callable[[int, int], None]] = None, - ) -> Tuple[List[FileMetrics], List[FileAnalysisError]]: + ) -> Tuple[List[FileMetrics], List[FileAnalysisError], List[AnalysisSkipWarning]]: """ Process multiple files in parallel. Args: files: List of file paths to process. process_func: Function to process a single file. Should return - (FileMetrics, None) on success or (None, FileAnalysisError) on error. + (FileMetrics, None, skip_warn) on success or (None, FileAnalysisError, None) on error. progress_callback: Optional callback for progress updates (completed, total). Returns: - Tuple of (successful results, failed files). + Tuple of (successful results, failed files, skip warnings). """ if not self.enabled or len(files) <= 1: # Process sequentially if disabled or only one file @@ -86,20 +91,28 @@ def process_files( def _process_sequential( self, files: List[Path], - process_func: Callable[[Path], Tuple[Optional[FileMetrics], Optional[FileAnalysisError]]], + process_func: Callable[ + [Path], + Tuple[ + Optional[FileMetrics], Optional[FileAnalysisError], Optional[AnalysisSkipWarning] + ], + ], progress_callback: Optional[Callable[[int, int], None]] = None, - ) -> Tuple[List[FileMetrics], List[FileAnalysisError]]: + ) -> Tuple[List[FileMetrics], List[FileAnalysisError], List[AnalysisSkipWarning]]: """Process files sequentially.""" results: List[FileMetrics] = [] errors: List[FileAnalysisError] = [] + skips: List[AnalysisSkipWarning] = [] for i, file_path in enumerate(files): try: - result, error = process_func(file_path) + result, error, skip = process_func(file_path) if result is not None: results.append(result) if error is not None: errors.append(error) + if skip is not None: + skips.append(skip) except Exception as e: logger.error(f"Unexpected error processing {file_path}: {e}", exc_info=True) errors.append( @@ -114,17 +127,23 @@ def _process_sequential( if progress_callback: progress_callback(i + 1, len(files)) - return results, errors + return results, errors, skips def _process_parallel_threads( self, files: List[Path], - process_func: Callable[[Path], Tuple[Optional[FileMetrics], Optional[FileAnalysisError]]], + process_func: Callable[ + [Path], + Tuple[ + Optional[FileMetrics], Optional[FileAnalysisError], Optional[AnalysisSkipWarning] + ], + ], progress_callback: Optional[Callable[[int, int], None]] = None, - ) -> Tuple[List[FileMetrics], List[FileAnalysisError]]: + ) -> Tuple[List[FileMetrics], List[FileAnalysisError], List[AnalysisSkipWarning]]: """Process files in parallel using threads.""" results: List[FileMetrics] = [] errors: List[FileAnalysisError] = [] + skips: List[AnalysisSkipWarning] = [] completed = 0 with ThreadPoolExecutor(max_workers=self.max_workers) as executor: @@ -139,11 +158,13 @@ def _process_parallel_threads( completed += 1 try: - result, error = future.result() + result, error, skip = future.result() if result is not None: results.append(result) if error is not None: errors.append(error) + if skip is not None: + skips.append(skip) except Exception as e: logger.error(f"Unexpected error processing {file_path}: {e}", exc_info=True) recovery_msg = "Check the file for syntax errors or encoding issues" @@ -159,14 +180,19 @@ def _process_parallel_threads( if progress_callback: progress_callback(completed, len(files)) - return results, errors + return results, errors, skips def _process_parallel_processes( self, files: List[Path], - process_func: Callable[[Path], Tuple[Optional[FileMetrics], Optional[FileAnalysisError]]], + process_func: Callable[ + [Path], + Tuple[ + Optional[FileMetrics], Optional[FileAnalysisError], Optional[AnalysisSkipWarning] + ], + ], progress_callback: Optional[Callable[[int, int], None]] = None, - ) -> Tuple[List[FileMetrics], List[FileAnalysisError]]: + ) -> Tuple[List[FileMetrics], List[FileAnalysisError], List[AnalysisSkipWarning]]: """ Process files in parallel using processes. @@ -176,6 +202,7 @@ def _process_parallel_processes( """ results: List[FileMetrics] = [] errors: List[FileAnalysisError] = [] + skips: List[AnalysisSkipWarning] = [] completed = 0 try: @@ -191,11 +218,13 @@ def _process_parallel_processes( completed += 1 try: - result, error = future.result() + result, error, skip = future.result() if result is not None: results.append(result) if error is not None: errors.append(error) + if skip is not None: + skips.append(skip) except Exception as e: logger.error(f"Unexpected error processing {file_path}: {e}", exc_info=True) recovery_msg = "Check the file for syntax errors or encoding issues" @@ -216,7 +245,7 @@ def _process_parallel_processes( logger.info("Falling back to sequential processing") return self._process_sequential(files, process_func, progress_callback) - return results, errors + return results, errors, skips def get_config(self) -> Dict[str, Any]: """ diff --git a/refactron/core/refactron.py b/refactron/core/refactron.py index 573004b..25f4d92 100644 --- a/refactron/core/refactron.py +++ b/refactron/core/refactron.py @@ -267,17 +267,18 @@ def analyze(self, target: Union[str, Path]) -> AnalysisResult: # Create a wrapper function for parallel processing def process_file_wrapper( file_path: Path, - ) -> Tuple[Optional[FileMetrics], Optional[FileAnalysisError]]: + ) -> Tuple[ + Optional[FileMetrics], Optional[FileAnalysisError], Optional[AnalysisSkipWarning] + ]: try: file_metrics, skip_warn = self._analyze_file(file_path) - if skip_warn is not None: - result.semantic_skip_warnings.append(skip_warn) + # Warnings are collected by ParallelProcessor.process_files return # Update incremental tracker if self.incremental_tracker.enabled: self.incremental_tracker.update_file_state(file_path) - return file_metrics, None + return file_metrics, None, skip_warn except AnalysisError as e: logger.debug(f"Failed to analyze {file_path}: {e}") error = FileAnalysisError( @@ -286,7 +287,7 @@ def process_file_wrapper( error_type=e.__class__.__name__, recovery_suggestion=e.recovery_suggestion, ) - return None, error + return None, error, None except Exception as e: logger.error(f"Unexpected error analyzing {file_path}: {e}", exc_info=True) error = FileAnalysisError( @@ -295,16 +296,17 @@ def process_file_wrapper( error_type=e.__class__.__name__, recovery_suggestion="Check the file for syntax errors or encoding issues", ) - return None, error + return None, error, None # Process files in parallel - file_metrics_list, error_list = self.parallel_processor.process_files( + file_metrics_list, error_list, skip_warnings = self.parallel_processor.process_files( files, process_file_wrapper, ) result.file_metrics.extend(file_metrics_list) result.failed_files.extend(error_list) + result.semantic_skip_warnings.extend(skip_warnings) result.total_issues = sum(fm.issue_count for fm in file_metrics_list) else: # Sequential processing @@ -331,13 +333,12 @@ def process_file_wrapper( ) except Exception as e: logger.error(f"Unexpected error analyzing {file_path}: {e}", exc_info=True) - recovery_msg = "Check the file for syntax errors or encoding issues" result.failed_files.append( FileAnalysisError( file_path=file_path, error_message=str(e), error_type=e.__class__.__name__, - recovery_suggestion=recovery_msg, + recovery_suggestion="Check the file for syntax errors or encoding issues", ) ) @@ -346,7 +347,7 @@ def process_file_wrapper( skipped_count = len(result.semantic_skip_warnings) if total_analyzed > 0 and skipped_count / total_analyzed > 0.10: result.semantic_skip_summary = ( - f"⚠ Semantic analysis (taint) was skipped for {skipped_count} of " + f"(warning) Semantic analysis (taint) was skipped for {skipped_count} of " f"{total_analyzed} files ({skipped_count / total_analyzed * 100:.0f}%). " "Check logs for details. Common causes: unsupported syntax or very large files." ) @@ -371,7 +372,7 @@ def process_file_wrapper( analyzers_used=analyzer_names, ) - # End memory profiling + # Final memory snapshot if self.memory_profiler.enabled: self.memory_profiler.snapshot("analysis_end") diff = self.memory_profiler.compare("analysis_start", "analysis_end") @@ -849,4 +850,4 @@ def clear_caches(self) -> None: self.ast_cache.clear() self.incremental_tracker.clear() self.memory_profiler.clear_snapshots() - logger.info("Caches cleared successfully") + logger.info("Caches cleared successfully") \ No newline at end of file diff --git a/tests/test_config_management.py b/tests/test_config_management.py index c71b71a..50afda8 100644 --- a/tests/test_config_management.py +++ b/tests/test_config_management.py @@ -903,16 +903,16 @@ def test_parallel_processor_sequential_and_thread_modes(tmp_path: Path) -> None: def process_func(p: Path): if p.name == "bad.py": raise ValueError("boom") - return None, None + return None, None, None p_seq = ParallelProcessor(max_workers=1, use_processes=False, enabled=True) - _, errors = p_seq.process_files(files, process_func) + _, errors, skips = p_seq.process_files(files, process_func) assert p_seq.enabled is False assert len(errors) == 1 assert isinstance(errors[0], FileAnalysisError) p_thr = ParallelProcessor(max_workers=2, use_processes=False, enabled=True) - results, errors = p_thr.process_files(files, lambda p: (None, None)) + results, errors, skips = p_thr.process_files(files, lambda p: (None, None, None)) assert results == [] assert errors == [] assert p_thr.get_config()["max_workers"] == 2 @@ -1171,11 +1171,11 @@ def make_error(path): def success_func(p): - return make_metrics(p), None + return make_metrics(p), None, None def error_func(p): - return None, make_error(p) + return None, make_error(p), None def raises_func(p): @@ -1210,23 +1210,23 @@ def test_get_config(self): class TestSequentialProcessing: def test_empty_files(self): pp = ParallelProcessor(enabled=False) - results, errors = pp.process_files([], success_func) - assert results == [] and errors == [] + results, errors, skips = pp.process_files([], success_func) + assert results == [] and errors == [] and skips == [] def test_single_file_success(self): pp = ParallelProcessor(enabled=False) files = [Path("a.py")] - results, errors = pp.process_files(files, success_func) - assert len(results) == 1 and len(errors) == 0 + results, errors, skips = pp.process_files(files, success_func) + assert len(results) == 1 and len(errors) == 0 and len(skips) == 0 def test_single_file_error(self): pp = ParallelProcessor(enabled=False) - results, errors = pp.process_files([Path("a.py")], error_func) - assert len(results) == 0 and len(errors) == 1 + results, errors, skips = pp.process_files([Path("a.py")], error_func) + assert len(results) == 0 and len(errors) == 1 and len(skips) == 0 def test_single_file_exception(self): pp = ParallelProcessor(enabled=False) - results, errors = pp.process_files([Path("a.py")], raises_func) + results, errors, skips = pp.process_files([Path("a.py")], raises_func) assert len(errors) == 1 def test_progress_callback(self): @@ -1244,12 +1244,13 @@ class TestThreadedProcessing: def test_two_files_threads(self): pp = ParallelProcessor(max_workers=2, use_processes=False, enabled=True) files = [Path("a.py"), Path("b.py")] - results, errors = pp.process_files(files, success_func) + results, errors, skips = pp.process_files(files, success_func) assert len(results) == 2 + assert len(skips) == 0 def test_thread_error_handling(self): pp = ParallelProcessor(max_workers=2, use_processes=False, enabled=True) - results, errors = pp.process_files([Path("a.py"), Path("b.py")], raises_func) + results, errors, skips = pp.process_files([Path("a.py"), Path("b.py")], raises_func) assert len(errors) == 2 def test_thread_progress_callback(self): @@ -1264,7 +1265,7 @@ def test_thread_progress_callback(self): def test_single_file_goes_sequential(self): pp = ParallelProcessor(max_workers=4, use_processes=False, enabled=True) - results, errors = pp.process_files([Path("a.py")], success_func) + results, errors, skips = pp.process_files([Path("a.py")], success_func) assert len(results) == 1 @@ -1274,13 +1275,13 @@ def test_process_pool_falls_back_on_exception(self): with patch( "refactron.core.parallel.ProcessPoolExecutor", side_effect=Exception("spawn fail") ): - results, errors = pp.process_files([Path("a.py")], success_func) + results, errors, skips = pp.process_files([Path("a.py")], success_func) assert len(results) == 1 def test_process_pool_success(self): pp = ParallelProcessor(max_workers=2, use_processes=True, enabled=True) mock_future = MagicMock() - mock_future.result.return_value = (make_metrics(Path("a.py")), None) + mock_future.result.return_value = (make_metrics(Path("a.py")), None, None) mock_exec = MagicMock() mock_exec.__enter__ = lambda s: s mock_exec.__exit__ = MagicMock(return_value=False) diff --git a/tests/test_performance_optimization.py b/tests/test_performance_optimization.py index 0b66afc..ca60500 100644 --- a/tests/test_performance_optimization.py +++ b/tests/test_performance_optimization.py @@ -242,11 +242,12 @@ def test_sequential_processing(self): def process_func(file_path): # Simulate processing - return None, None + return None, None, None - results, errors = processor.process_files(files, process_func) + results, errors, skips = processor.process_files(files, process_func) assert len(results) == 0 # All return None assert len(errors) == 0 + assert len(skips) == 0 class TestMemoryProfiler: diff --git a/tests/test_symbol_table_incremental.py b/tests/test_symbol_table_incremental.py new file mode 100644 index 0000000..1291f27 --- /dev/null +++ b/tests/test_symbol_table_incremental.py @@ -0,0 +1,122 @@ +import json +import time +from pathlib import Path +from refactron.analysis.symbol_table import SymbolTableBuilder, SymbolType + + +def test_symbol_table_incremental_build(tmp_path): + project_root = tmp_path / "project" + project_root.mkdir() + + file1 = project_root / "module1.py" + file1.write_text("def func1(): pass\nclass Class1: pass") + + cache_dir = tmp_path / "cache" + builder = SymbolTableBuilder(cache_dir=cache_dir) + + # 1. First build + table = builder.build_for_project(project_root) + assert "func1" in table.exports + assert "Class1" in table.exports + + cache_file = cache_dir / "symbols.json" + assert cache_file.exists() + + with open(cache_file, "r") as f: + cache_data = json.load(f) + assert file1.resolve().as_posix() in cache_data["file_metadata"] + + # 2. Second build (no change) + # We'll monkeypatch _analyze_file to verify it's not called + original_analyze = builder._analyze_file + analyze_called = [] + + def mocked_analyze(path): + analyze_called.append(path) + return original_analyze(path) + + builder._analyze_file = mocked_analyze + table2 = builder.build_for_project(project_root) + + assert len(analyze_called) == 0 + assert "func1" in table2.exports + + # 3. Modify file (incremental update) + time.sleep(0.1) # Ensure mtime changes + file1.write_text("def func1_v2(): pass\nclass Class1: pass") + + analyze_called.clear() + table3 = builder.build_for_project(project_root) + + assert len(analyze_called) == 1 + assert "func1_v2" in table3.exports + assert "func1" not in table3.exports + assert "Class1" in table3.exports + + # 4. Add new file + file2 = project_root / "module2.py" + file2.write_text("var2 = 42") + + analyze_called.clear() + table4 = builder.build_for_project(project_root) + + assert len(analyze_called) == 1 + assert file2.resolve().as_posix() in [p.as_posix() for p in analyze_called] + assert "var2" in table4.exports + + # 5. Delete file + file1.unlink() + + analyze_called.clear() + table5 = builder.build_for_project(project_root) + + assert len(analyze_called) == 0 + assert "func1_v2" not in table5.exports + assert "Class1" not in table5.exports + assert "var2" in table5.exports + assert file1.resolve().as_posix() not in table5.file_metadata + + +def test_symbol_table_hash_validation(tmp_path): + """Verify that content change triggers re-analysis even if mtime stays the same.""" + project_root = tmp_path / "project" + project_root.mkdir() + + file1 = project_root / "module1.py" + file1.write_text("x = 1") + + cache_dir = tmp_path / "cache" + builder = SymbolTableBuilder(cache_dir=cache_dir) + + # Initial build + builder.build_for_project(project_root) + original_mtime = file1.stat().st_mtime + original_size = file1.stat().st_size + + # Modify content but keep same size and restore mtime (simulated) + # Actually, hard to keep same size AND restore mtime exactly in some FS, + # but we can try. + file1.write_text("y = 2") # same size "x = 1" vs "y = 2" + import os + + os.utime(file1, (original_mtime, original_mtime)) + + # Verify mtime/size match but content hash differs + assert file1.stat().st_mtime == original_mtime + assert file1.stat().st_size == original_size + + analyze_called = [] + original_analyze = builder._analyze_file + + def mocked_analyze(path): + analyze_called.append(path) + return original_analyze(path) + + builder._analyze_file = mocked_analyze + + table = builder.build_for_project(project_root) + + # Should detect change via hash + assert len(analyze_called) == 1 + assert "y" in table.exports + assert "x" not in table.exports