Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions scripts/remote_upload_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,25 @@
import scripts.ingest_code as idx


def _cache_missing_stats(file_hashes: Dict[str, Any]) -> Tuple[bool, int, int]:
"""Return (is_stale, missing_count, checked_count) for cached paths."""
if not file_hashes:
return (False, 0, 0)
missing = 0
checked = 0
for path_str in file_hashes.keys():
try:
if not Path(path_str).exists():
missing += 1
except Exception:
missing += 1
checked += 1
if checked == 0:
return (False, 0, 0)
missing_ratio = missing / checked
return (missing_ratio >= 0.25, missing, checked)


def _find_git_root(start: Path) -> Optional[Path]:
"""Best-effort detection of the git repository root for a workspace.

Expand Down Expand Up @@ -337,6 +356,24 @@ def _load_local_cache_file_hashes(workspace_path: str, repo_name: Optional[str])
file_hashes = data.get("file_hashes", {})
if not isinstance(file_hashes, dict):
return {}
is_stale, missing, checked = _cache_missing_stats(file_hashes)
if is_stale:
logger.warning(
"[remote_upload] Detected stale local cache (%d/%d missing); clearing %s",
missing,
checked,
cache_path,
)
try:
cache_path.unlink(missing_ok=True) # type: ignore[arg-type]
except TypeError:
try:
cache_path.unlink()
except Exception:
pass
except Exception:
pass
return {}
return file_hashes
except Exception:
return {}
Expand Down
25 changes: 24 additions & 1 deletion scripts/standalone_upload_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,15 @@ def _load_cache(self) -> Dict[str, str]:
try:
with open(self.cache_file, 'r', encoding='utf-8') as f:
data = json.load(f)
return data.get("file_hashes", {})
file_hashes = data.get("file_hashes", {})
if self._cache_seems_stale(file_hashes):
logger.warning(
"[hash_cache] Detected stale cache with missing paths; resetting %s",
self.cache_file,
)
self._save_cache({})
return {}
return file_hashes
except Exception:
return {}

Expand Down Expand Up @@ -197,6 +205,21 @@ def remove_hash(self, file_path: str) -> None:
file_hashes.pop(abs_path, None)
self._save_cache(file_hashes)

def _cache_seems_stale(self, file_hashes: Dict[str, str]) -> bool:
"""Return True if a large portion of cached paths no longer exist on disk."""
total = len(file_hashes)
if total == 0:
return False
missing = 0
for path_str in file_hashes.keys():
try:
if not Path(path_str).exists():
missing += 1
except Exception:
missing += 1
missing_ratio = missing / total
return missing_ratio >= 0.25

# Create global cache instance (will be initialized in RemoteUploadClient)
_hash_cache: Optional[SimpleHashCache] = None

Expand Down
32 changes: 32 additions & 0 deletions scripts/upload_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,25 @@ def validate_bundle_format(bundle_path: Path) -> Dict[str, Any]:
except Exception as e:
raise ValueError(f"Invalid bundle format: {str(e)}")

def _cleanup_empty_dirs(path: Path, stop_at: Path) -> None:
"""Recursively remove empty directories up to stop_at (exclusive)."""
try:
path = path.resolve()
stop_at = stop_at.resolve()
except Exception:
pass
while True:
try:
if path == stop_at or not path.exists() or not path.is_dir():
break
if any(path.iterdir()):
break
path.rmdir()
path = path.parent
except Exception:
break


def process_delta_bundle(workspace_path: str, bundle_path: Path, manifest: Dict[str, Any]) -> Dict[str, int]:
"""Process delta bundle and return operation counts."""
operations_count = {
Expand Down Expand Up @@ -313,10 +332,23 @@ def process_delta_bundle(workspace_path: str, bundle_path: Path, manifest: Dict[
else:
operations_count["failed"] += 1

# Remove original source file if provided
source_rel_path = operation.get("source_path") or operation.get("source_relative_path")
if source_rel_path:
source_path = workspace / source_rel_path
if source_path.exists():
try:
source_path.unlink()
operations_count["deleted"] += 1
_cleanup_empty_dirs(source_path.parent, workspace)
except Exception as del_err:
logger.error(f"Error deleting source file for move {source_rel_path}: {del_err}")

elif op_type == "deleted":
# Delete file
if target_path.exists():
target_path.unlink()
_cleanup_empty_dirs(target_path.parent, workspace)
operations_count["deleted"] += 1
else:
operations_count["skipped"] += 1
Expand Down
Loading