Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 57 additions & 5 deletions refactron/analysis/symbol_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,23 @@ class SymbolTable:
symbols: Dict[str, Dict[str, Dict[str, Symbol]]] = field(default_factory=dict)
# Map: global_name -> Symbol (for easy cross-file lookup of exports)
exports: Dict[str, Symbol] = field(default_factory=dict)
# Metadata for caching (Map: file_path -> mtime)
metadata: Dict[str, float] = field(default_factory=dict)
Comment on lines +64 to +65
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify all mtime usage points to ensure precision consistency.
rg -n --type=py -C2 '\bst_mtime\b|\bst_mtime_ns\b'

Repository: Refactron-ai/Refactron_lib

Length of output: 2427


🏁 Script executed:

cat -n refactron/analysis/symbol_table.py | head -80

Repository: Refactron-ai/Refactron_lib

Length of output: 3232


🏁 Script executed:

sed -n '155,170p' refactron/analysis/symbol_table.py

Repository: Refactron-ai/Refactron_lib

Length of output: 647


🏁 Script executed:

sed -n '260,275p' refactron/analysis/symbol_table.py

Repository: Refactron-ai/Refactron_lib

Length of output: 754


🏁 Script executed:

sed -n '295,310p' refactron/analysis/symbol_table.py

Repository: Refactron-ai/Refactron_lib

Length of output: 552


Use st_mtime_ns (integer nanoseconds) instead of st_mtime (float seconds) for reliable cache invalidation.

The current code uses st_mtime (float seconds) and compares with ==, which can miss rapid file edits on filesystems with coarse timestamp resolution (e.g., 1-second granularity on some systems). Switching to st_mtime_ns (integer nanoseconds) provides sub-microsecond precision and avoids floating-point comparison issues.

Update:

  • Line 65: Change metadata: Dict[str, float] to Dict[str, int]
  • Line 159: Change st_mtime to st_mtime_ns
  • Line 301: Convert deserialized values with {k: int(v) for k, v in data.get("metadata", {}).items()}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@refactron/analysis/symbol_table.py` around lines 64 - 65, The metadata cache
currently types values as Dict[str, float] and uses st_mtime (float); change the
metadata annotation to Dict[str, int], replace any os.stat(...).st_mtime reads
with os.stat(...).st_mtime_ns so stored mtimes are integer nanoseconds, and
update the deserialization path that loads saved metadata to convert values to
ints (e.g., {k: int(v) for k, v in data.get("metadata", {}).items()}) so
comparisons use integer ns timestamps. Ensure you update the declaration of the
metadata field, the place(s) that set/check file mtime (use st_mtime_ns), and
the load/deserialize code that reconstructs metadata from persisted data.


def remove_file(self, file_path: str) -> None:
"""Remove all symbols associated with a specific file."""
if file_path not in self.symbols:
return

for scope, names in self.symbols[file_path].items():
if scope == "global":
for name, symbol in list(names.items()):
if name in self.exports and self.exports[name].file_path == file_path:
del self.exports[name]

del self.symbols[file_path]
if file_path in self.metadata:
del self.metadata[file_path]
Comment on lines +67 to +80
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Fix stale metadata cleanup in remove_file.

At Line 69, the early return prevents metadata cleanup for files that had no extracted symbols. Deleted symbol-less files can remain forever in metadata.

Proposed fix
 def remove_file(self, file_path: str) -> None:
     """Remove all symbols associated with a specific file."""
-    if file_path not in self.symbols:
-        return
-        
-    for scope, names in self.symbols[file_path].items():
-        if scope == "global":
-            for name, symbol in list(names.items()):
-                if name in self.exports and self.exports[name].file_path == file_path:
-                    del self.exports[name]
-                        
-    del self.symbols[file_path]
-    if file_path in self.metadata:
-        del self.metadata[file_path]
+    if file_path in self.symbols:
+        for scope, names in self.symbols[file_path].items():
+            if scope == "global":
+                for name in list(names):
+                    if name in self.exports and self.exports[name].file_path == file_path:
+                        del self.exports[name]
+        del self.symbols[file_path]
+    self.metadata.pop(file_path, None)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def remove_file(self, file_path: str) -> None:
"""Remove all symbols associated with a specific file."""
if file_path not in self.symbols:
return
for scope, names in self.symbols[file_path].items():
if scope == "global":
for name, symbol in list(names.items()):
if name in self.exports and self.exports[name].file_path == file_path:
del self.exports[name]
del self.symbols[file_path]
if file_path in self.metadata:
del self.metadata[file_path]
def remove_file(self, file_path: str) -> None:
"""Remove all symbols associated with a specific file."""
if file_path in self.symbols:
for scope, names in self.symbols[file_path].items():
if scope == "global":
for name in list(names):
if name in self.exports and self.exports[name].file_path == file_path:
del self.exports[name]
del self.symbols[file_path]
self.metadata.pop(file_path, None)
🧰 Tools
🪛 Ruff (0.15.9)

[warning] 74-74: Loop control variable symbol not used within loop body

Rename unused symbol to _symbol

(B007)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@refactron/analysis/symbol_table.py` around lines 67 - 80, The early return in
remove_file prevents cleaning up self.metadata for files that had no extracted
symbols; update remove_file (in symbol_table.SymbolTable) to always delete
metadata for file_path even when file_path not in self.symbols—either remove the
early return and wrap the symbol-specific cleanup in an if block (if file_path
in self.symbols: ...del self.symbols[file_path]) or keep the early return but
perform metadata deletion before returning (if file_path not in self.symbols: if
file_path in self.metadata: del self.metadata[file_path]; return); preserve the
existing export-cleanup logic for files that do exist in self.symbols.


def add_symbol(self, symbol: Symbol) -> None:
"""Add a symbol to the table."""
Expand Down Expand Up @@ -121,18 +138,50 @@ def __init__(self, cache_dir: Optional[Path] = None):

def build_for_project(self, project_root: Path) -> SymbolTable:
"""Scan project and build symbol table."""
updated = False
if self.cache_dir:
cached = self._load_cache()
if cached:
# TODO: Implement incremental update logic here
return cached
self.symbol_table = cached

all_python_files = list(project_root.rglob("*.py"))
excluded_dirs = {".git", ".rag", "__pycache__", "venv", ".venv", "env", "node_modules"}
python_files = [
f for f in all_python_files if not any(excluded in f.parts for excluded in excluded_dirs)
]

current_file_paths = set()

python_files = list(project_root.rglob("*.py"))
for file_path in python_files:
file_str = str(file_path)
current_file_paths.add(file_str)
try:
mtime = file_path.stat().st_mtime
except OSError:
continue

if file_str in self.symbol_table.metadata and self.symbol_table.metadata[file_str] == mtime:
continue

# Need to update this file
if file_str in self.symbol_table.symbols:
self.symbol_table.remove_file(file_str)

self._analyze_file(file_path)
self.symbol_table.metadata[file_str] = mtime
updated = True
Comment on lines 170 to +172
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don’t advance metadata when analysis fails.

At Line 171, metadata is updated even if _analyze_file failed (Line 197-198 catches and suppresses). That can lock in missing symbols and skip future retries until file mtime changes again.

Proposed fix
-            self._analyze_file(file_path)
-            self.symbol_table.metadata[file_str] = mtime
+            analyzed = self._analyze_file(file_path)
+            if analyzed:
+                self.symbol_table.metadata[file_str] = mtime
+            else:
+                self.symbol_table.metadata.pop(file_str, None)
             updated = True
...
-    def _analyze_file(self, file_path: Path) -> None:
+    def _analyze_file(self, file_path: Path) -> bool:
         """Analyze a single file and populate symbols."""
         try:
             # We use astroid for better inference capabilities later
             tree = self.inference_engine.parse_file(str(file_path))
@@
             # Walk the tree
             self._visit_node(tree, str(file_path), "global")
+            return True
 
         except Exception as e:
             logger.warning(f"Failed to build symbol table for {file_path}: {e}")
+            return False

Also applies to: 188-199

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@refactron/analysis/symbol_table.py` around lines 170 - 172, The metadata
advance is happening regardless of whether _analyze_file succeeded; modify the
flow so that symbol_table.metadata[file_str] = mtime and updated = True are only
executed after a successful run of _analyze_file (i.e., no exception was
raised). Concretely, in the blocks that call _analyze_file (referencing
_analyze_file, file_str, mtime, symbol_table.metadata, and the updated flag),
move the metadata assignment and updated=True into the success path (or after
try/except only if no exception occurred) and ensure caught exceptions do not
update metadata so failed analyses will be retried later; apply the same change
for both occurrences that wrap _analyze_file (the first block and the block
around lines 188-199).


if self.cache_dir:
self._save_cache()
# Check for deleted files
deleted_files = set(self.symbol_table.symbols.keys()) - current_file_paths
for file_str in deleted_files:
self.symbol_table.remove_file(file_str)
updated = True

if getattr(self, "cache_dir", None) and updated:
try:
self._save_cache()
except Exception as e:
logger.warning(f"Failed to save cache in build_for_project: {e}")

return self.symbol_table

Expand Down Expand Up @@ -214,6 +263,7 @@ def _save_cache(self) -> None:
for f, scopes in self.symbol_table.symbols.items()
},
"exports": {n: sym.to_dict() for n, sym in self.symbol_table.exports.items()},
"metadata": self.symbol_table.metadata,
}

with open(cache_file, "w") as f:
Expand Down Expand Up @@ -247,6 +297,8 @@ def _load_cache(self) -> Optional[SymbolTable]:
# Reconstruct exports
for name, sym_data in data.get("exports", {}).items():
table.exports[name] = Symbol.from_dict(sym_data)

table.metadata = data.get("metadata", {})

return table

Expand Down
Loading