Refactron-ai · shrutu0929 · Apr 10, 2026 · coderabbitai · Apr 10, 2026 · coderabbitai
diff --git a/refactron/analysis/symbol_table.py b/refactron/analysis/symbol_table.py
@@ -3,12 +3,13 @@
 Maps classes, functions, variables, and their relationships across the codebase.
 """
 
+import hashlib
 import json
 import logging
 from dataclasses import dataclass, field
 from enum import Enum
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Set
 
 from refactron.core.inference import InferenceEngine
 
@@ -61,30 +62,59 @@ class SymbolTable:
     symbols: Dict[str, Dict[str, Dict[str, Symbol]]] = field(default_factory=dict)
     # Map: global_name -> Symbol (for easy cross-file lookup of exports)
     exports: Dict[str, Symbol] = field(default_factory=dict)
+    # Map: file_path -> { "mtime": float, "size": int, "sha256": str }
+    file_metadata: Dict[str, Dict[str, Any]] = field(default_factory=dict)
+
+    @staticmethod
+    def _normalize_path(path: str) -> str:
+        """Standardize path format for consistent keys/storage."""
+        return Path(path).resolve().as_posix()
 
     def add_symbol(self, symbol: Symbol) -> None:
         """Add a symbol to the table."""
-        if symbol.file_path not in self.symbols:
-            self.symbols[symbol.file_path] = {}
+        path = self._normalize_path(symbol.file_path)
+        # Ensure the symbol itself stores the normalized path
+        symbol.file_path = path
+
+        if path not in self.symbols:
+            self.symbols[path] = {}
 
-        if symbol.scope not in self.symbols[symbol.file_path]:
-            self.symbols[symbol.file_path][symbol.scope] = {}
+        if symbol.scope not in self.symbols[path]:
+            self.symbols[path][symbol.scope] = {}
 
-        self.symbols[symbol.file_path][symbol.scope][symbol.name] = symbol
+        self.symbols[path][symbol.scope][symbol.name] = symbol
 
         # Track global exports (top-level functions and classes)
         if symbol.scope == "global" and symbol.type in (
             SymbolType.CLASS,
             SymbolType.FUNCTION,
             SymbolType.VARIABLE,
         ):
-            # Key by module path + name? Or just name for now?
-            # Using simple name collision strategy for MVP
             self.exports[symbol.name] = symbol
 
+    def remove_file(self, file_path: str) -> None:
+        """Remove all symbols and metadata associated with a file."""
+        norm_path = self._normalize_path(file_path)
+
+        if norm_path in self.symbols:
+            del self.symbols[norm_path]
+
+        # Remove from exports
+        names_to_remove = [
+            name
+            for name, sym in self.exports.items()
+            if self._normalize_path(sym.file_path) == norm_path
+        ]
+        for name in names_to_remove:
+            self.exports.pop(name, None)
+
+        if norm_path in self.file_metadata:
+            del self.file_metadata[norm_path]
+
     def get_symbol(self, file_path: str, name: str, scope: str = "global") -> Optional[Symbol]:
         """Retrieve a symbol."""
-        return self.symbols.get(file_path, {}).get(scope, {}).get(name)
+        norm_path = self._normalize_path(file_path)
+        return self.symbols.get(norm_path, {}).get(scope, {}).get(name)
 
     def resolve_reference(
         self, name: str, current_file: str, current_scope: str
@@ -106,8 +136,7 @@ def resolve_reference(
             if file_global:
                 return file_global
 
-        # 3. Cross-file exports (Naive implementation)
-        # TODO: Enhance this with proper import resolution
+        # 3. Cross-file exports
         return self.exports.get(name)
 
 
@@ -120,42 +149,94 @@ def __init__(self, cache_dir: Optional[Path] = None):
         self.inference_engine = InferenceEngine()
 
     def build_for_project(self, project_root: Path) -> SymbolTable:
-        """Scan project and build symbol table."""
+        """Scan project and build symbol table incrementally."""
         if self.cache_dir:
-            cached = self._load_cache()
-            if cached:
-                # TODO: Implement incremental update logic here
-                return cached
+            cached_table = self._load_cache()
+            if cached_table:
+                self.symbol_table = cached_table
 
         python_files = list(project_root.rglob("*.py"))
+        current_file_paths = {fp.resolve().as_posix() for fp in python_files}
+
+        # 1. Remove deleted files
+        cached_files = list(self.symbol_table.file_metadata.keys())
+        for cached_path in cached_files:
+            if cached_path not in current_file_paths:
+                logger.debug(f"Removing deleted file from symbol table: {cached_path}")
+                self.symbol_table.remove_file(cached_path)
+
+        # 2. Analyze new or modified files
         for file_path in python_files:
-            self._analyze_file(file_path)
+            abs_path = file_path.resolve()
+            path_str = abs_path.as_posix()
+            if self._has_file_changed(abs_path, path_str):
+                logger.debug(f"Analyzing changed file: {path_str}")
+                self.symbol_table.remove_file(path_str)
+                self._analyze_file(abs_path)
+                self._update_file_metadata(abs_path, path_str)
 
         if self.cache_dir:
             self._save_cache()
 
         return self.symbol_table
 
-    def _analyze_file(self, file_path: Path) -> None:
-        """Analyze a single file and populate symbols."""
+    def _has_file_changed(self, file_path: Path, file_path_str: str) -> bool:
+        """Check if file has changed since last analysis."""
+        if file_path_str not in self.symbol_table.file_metadata:
+            return True
+
+        metadata = self.symbol_table.file_metadata[file_path_str]
+        try:
+            stat = file_path.stat()
+            if stat.st_size != metadata.get("size"):
+                return True
+
+            # Authoritative check: compare SHA-256 hashes
+            stored_hash = metadata.get("sha256")
+            if stored_hash:
+                current_hash = self._calculate_hash(file_path)
+                return current_hash != stored_hash
+
+            return stat.st_mtime != metadata.get("mtime")
+        except Exception:
+            return True
+
+    def _calculate_hash(self, file_path: Path) -> str:
+        """Calculate SHA-256 hash of file content."""
         try:
-            # We use astroid for better inference capabilities later
-            tree = self.inference_engine.parse_file(str(file_path))
+            return hashlib.sha256(file_path.read_bytes()).hexdigest()
+        except Exception:
+            return ""
 
-            # Walk the tree
-            self._visit_node(tree, str(file_path), "global")
+    def _update_file_metadata(self, file_path: Path, path_str: str) -> None:
+        """Update file metadata in symbol table."""
+        try:
+            stat = file_path.stat()
+            self.symbol_table.file_metadata[path_str] = {
+                "mtime": stat.st_mtime,
+                "size": stat.st_size,
+                "sha256": self._calculate_hash(file_path),
+            }
+        except Exception as e:
+            logger.warning(f"Failed to update metadata for {path_str}: {e}")
 
+    def _analyze_file(self, file_path: Path) -> None:
+        """Analyze a single file and populate symbols."""
+        path_str = file_path.resolve().as_posix()
+        try:
+            tree = self.inference_engine.parse_file(path_str)
+            self._visit_node(tree, path_str, "global")
         except Exception as e:
-            logger.warning(f"Failed to build symbol table for {file_path}: {e}")
+            logger.warning(f"Failed to build symbol table for {path_str}: {e}")
 
     def _visit_node(self, node: Any, file_path: str, scope: str) -> None:
         """Recursive node visitor."""
         import astroid.nodes as nodes
 
         new_scope = scope
 
-        if isinstance(node, (nodes.ClassDef, nodes.FunctionDef)):
-            # Register the definition itself in the CURRENT scope
+        # Recognize both FunctionDef and AsyncFunctionDef
+        if isinstance(node, (nodes.ClassDef, nodes.FunctionDef, nodes.AsyncFunctionDef)):
             symbol_type = (
                 SymbolType.CLASS if isinstance(node, nodes.ClassDef) else SymbolType.FUNCTION
             )
@@ -192,9 +273,8 @@ def _visit_node(self, node: Any, file_path: str, scope: str) -> None:
             self.symbol_table.add_symbol(symbol)
 
         # Recurse children
-        if hasattr(node, "get_children"):
-            for child in node.get_children():
-                self._visit_node(child, file_path, new_scope)
+        for child in node.get_children():
+            self._visit_node(child, file_path, new_scope)
 
     def _save_cache(self) -> None:
         """Save symbol table to cache."""
@@ -214,6 +294,7 @@ def _save_cache(self) -> None:
                     for f, scopes in self.symbol_table.symbols.items()
                 },
                 "exports": {n: sym.to_dict() for n, sym in self.symbol_table.exports.items()},
+                "file_metadata": self.symbol_table.file_metadata,
             }
 
             with open(cache_file, "w") as f:
@@ -238,15 +319,27 @@ def _load_cache(self) -> Optional[SymbolTable]:
 
             # Reconstruct symbols
             for f_path, scopes in data.get("symbols", {}).items():
-                table.symbols[f_path] = {}
+                # Normalize path on load just in case
+                norm_f_path = SymbolTable._normalize_path(f_path)
+                table.symbols[norm_f_path] = {}
                 for scope_name, names in scopes.items():
-                    table.symbols[f_path][scope_name] = {}
+                    table.symbols[norm_f_path][scope_name] = {}
                     for name, sym_data in names.items():
-                        table.symbols[f_path][scope_name][name] = Symbol.from_dict(sym_data)
+                        sym = Symbol.from_dict(sym_data)
+                        sym.file_path = norm_f_path
+                        table.symbols[norm_f_path][scope_name][name] = sym
 
             # Reconstruct exports
             for name, sym_data in data.get("exports", {}).items():
-                table.exports[name] = Symbol.from_dict(sym_data)
+                sym = Symbol.from_dict(sym_data)
+                sym.file_path = SymbolTable._normalize_path(sym.file_path)
+                table.exports[name] = sym
+
+            # Reconstruct metadata
+            file_metadata = data.get("file_metadata", {})
+            table.file_metadata = {
+                SymbolTable._normalize_path(k): v for k, v in file_metadata.items()
+            }
 
             return table
 

diff --git a/refactron/core/inference.py b/refactron/core/inference.py
@@ -3,6 +3,8 @@
 Provides capabilities to infer types, values, and resolve symbols.
 """
 
+import os
+from pathlib import Path
 from typing import Any, List, Optional
 
 import astroid
@@ -28,10 +30,67 @@ def parse_string(code: str, module_name: str = "") -> nodes.Module:
     @staticmethod
     def parse_file(file_path: str) -> nodes.Module:
         """Parse a file into an astroid node tree."""
-        builder = astroid.builder.AstroidBuilder(astroid.MANAGER)
-        with open(file_path, "r", encoding="utf-8") as f:
-            code = f.read()
-        return builder.string_build(code, modname=file_path)
+        # Use canonical path (resolved and posix-style for consistency)
+        abs_path = Path(file_path).resolve().as_posix()
+        manager = astroid.MANAGER
+
+        # Aggressively clear cache for this file to ensure fresh AST
+        # Try both resolved and absolute paths to handle symlinks and normalization differences
+        raw_abs = os.path.abspath(file_path)
+        manager.astroid_cache.pop(abs_path, None)
+        manager.astroid_cache.pop(raw_abs, None)
+        manager.astroid_cache.pop(file_path, None)
+
+        # 2. Find and clear by module name if it exists in caches
+        file_to_mod = getattr(manager, "file_to_module_cache", {})
+        # Some versions use _mod_file_cache
+        if not file_to_mod:
+            file_to_mod = getattr(manager, "_mod_file_cache", {})
+
+        modname = (
+            file_to_mod.get(abs_path) or file_to_mod.get(raw_abs) or file_to_mod.get(file_path)
+        )
+        if modname:
+            manager.astroid_cache.pop(modname, None)
+
+        # 3. Exhaustive search in astroid_cache for any module pointing to this file
+        for key, val in list(manager.astroid_cache.items()):
+            if hasattr(val, "file") and val.file:
+                val_path = Path(val.file).resolve().as_posix()
+                if val_path == abs_path or val_path == raw_abs.replace("\\", "/"):
+                    manager.astroid_cache.pop(key, None)
+
+        # 4. Clear the mappings themselves
+        for attr in ("file_to_module_cache", "_mod_file_cache"):
+            cache = getattr(manager, attr, None)
+            if isinstance(cache, dict):
+                cache.pop(abs_path, None)
+                cache.pop(raw_abs, None)
+                cache.pop(file_path, None)
+
+        # 5. Read file and parse directly to bypass astroid's file cache
+        try:
+            with open(abs_path, "r", encoding="utf-8") as f:
+                code = f.read()
+
+            # Resolve module name to keep astroid's state consistent
+            modname = ""
+            try:
+                from astroid import modutils
+
+                modname = modutils.modname_from_path(abs_path)
+            except Exception:
+                pass
+
+            # Use string_build via parse to avoid manager.ast_from_file's internal caching
+            return astroid.parse(code, module_name=modname, path=abs_path)
+        except (OSError, UnicodeDecodeError):
+            # Fallback to manager if manual read fails
+            try:
+                return manager.ast_from_file(abs_path)
+            except Exception as e:
+                # Fallback for virtual/non-existent files if needed
+                raise ValueError(f"Failed to parse {abs_path}: {e}")
 
     @staticmethod
     def infer_node(node: nodes.NodeNG, context: Optional[InferenceContext] = None) -> List[Any]: