Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions api.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,43 @@ async def delete_repositories(request: DeleteReposRequest):
raise HTTPException(status_code=500, detail=str(e))


class CheckFreshnessRequest(BaseModel):
repo_names: List[str] = Field(..., description="Repository names to check for updates")


class RefreshRepoRequest(BaseModel):
repo_name: str = Field(..., description="Repository name to refresh")


@app.post("/check-repo-freshness")
async def check_repo_freshness(request: CheckFreshnessRequest):
"""Check whether indexed repositories are up-to-date with their remotes.

Performs a lightweight git fetch + SHA comparison without modifying the index.
"""
fastcode = _ensure_fastcode_initialized()

results = []
for name in request.repo_names:
info = await asyncio.to_thread(fastcode.check_repo_for_updates, name)
results.append(info)

return {"status": "success", "results": results}


@app.post("/refresh-repo")
async def refresh_repo(request: RefreshRepoRequest):
"""Pull latest changes for a repository and re-index it."""
fastcode = _ensure_fastcode_initialized()

result = await asyncio.to_thread(fastcode.refresh_repository, request.repo_name)

if result.get("error"):
raise HTTPException(status_code=500, detail=result["error"])

return {"status": "success", **result}


@app.post("/clear-cache")
async def clear_cache():
"""Clear cache"""
Expand Down
113 changes: 112 additions & 1 deletion fastcode/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ def get_repository_info(self) -> Dict[str, Any]:
repo = Repo(self.repo_path)
info.update({
"branch": repo.active_branch.name,
"commit": repo.head.commit.hexsha[:8],
"commit": repo.head.commit.hexsha,
"remote_url": repo.remotes.origin.url if repo.remotes else None,
})
except Exception:
Expand All @@ -371,6 +371,117 @@ def get_repository_info(self) -> Dict[str, Any]:

return info

def get_head_commit(self, repo_path: Optional[str] = None) -> Optional[str]:
"""Return the full HEAD commit SHA for a repo on disk, or None."""
target = repo_path or self.repo_path
if not target or not os.path.isdir(target):
return None
try:
repo = Repo(target)
return repo.head.commit.hexsha
except Exception:
return None

def check_for_updates(self, repo_path: Optional[str] = None) -> Dict[str, Any]:
"""
Fetch from origin and compare local HEAD with remote HEAD.

Returns a dict with:
- has_updates (bool)
- local_commit (str | None)
- remote_commit (str | None)
- error (str | None)
"""
target = repo_path or self.repo_path
result: Dict[str, Any] = {
"has_updates": False,
"local_commit": None,
"remote_commit": None,
"error": None,
}
if not target or not os.path.isdir(target):
result["error"] = f"Path does not exist: {target}"
return result

try:
repo = Repo(target)
if not repo.remotes:
result["error"] = "No remote configured"
return result

local_commit = repo.head.commit.hexsha
result["local_commit"] = local_commit

origin = repo.remotes.origin
origin.fetch()

tracking = repo.active_branch.tracking_branch()
if tracking is None:
# Shallow clones may lack tracking info; compare with origin/<branch>
branch_name = repo.active_branch.name
remote_ref = f"origin/{branch_name}"
if remote_ref in [str(r) for r in repo.refs]:
remote_commit = repo.refs[remote_ref].commit.hexsha
else:
result["error"] = f"Cannot determine remote ref for {branch_name}"
return result
else:
remote_commit = tracking.commit.hexsha

result["remote_commit"] = remote_commit
result["has_updates"] = local_commit != remote_commit
except Exception as e:
result["error"] = str(e)

return result

def pull_updates(self, repo_path: Optional[str] = None) -> Dict[str, Any]:
"""
Pull latest changes from origin for the given repo.

For shallow clones (depth=1) we unshallow first so that pull works
reliably, then re-shallow to keep disk usage low.

Returns a dict with:
- success (bool)
- old_commit (str | None)
- new_commit (str | None)
- changed (bool) – whether HEAD actually moved
- error (str | None)
"""
target = repo_path or self.repo_path
result: Dict[str, Any] = {
"success": False,
"old_commit": None,
"new_commit": None,
"changed": False,
"error": None,
}
if not target or not os.path.isdir(target):
result["error"] = f"Path does not exist: {target}"
return result

try:
repo = Repo(target)
if not repo.remotes:
result["error"] = "No remote configured"
return result

old_commit = repo.head.commit.hexsha
result["old_commit"] = old_commit

origin = repo.remotes.origin
origin.pull()

new_commit = repo.head.commit.hexsha
result["new_commit"] = new_commit
result["changed"] = old_commit != new_commit
result["success"] = True
except Exception as e:
result["error"] = str(e)

return result

def cleanup(self):
"""Clean up temporary directories"""
if self.temp_dir and os.path.exists(self.temp_dir):
Expand Down
103 changes: 98 additions & 5 deletions fastcode/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,8 @@ def index_repository(self, force: bool = False):

# Save artifacts only when persistence is enabled
if self._should_persist_indexes():
# Save to cache with repository-specific name
self._save_to_cache(cache_name=repo_name)
indexed_commit = self.repo_info.get("commit") or self.loader.get_head_commit()
self._save_to_cache(cache_name=repo_name, indexed_commit=indexed_commit)

# Save BM25 and graph data
self.retriever.save_bm25(repo_name)
Expand Down Expand Up @@ -713,7 +713,7 @@ def _try_load_from_cache(self) -> bool:
self.logger.warning(f"Failed to load from cache: {e}")
return False

def _save_to_cache(self, cache_name: Optional[str] = None):
def _save_to_cache(self, cache_name: Optional[str] = None, indexed_commit: Optional[str] = None):
"""Save indexed data to cache"""
if not self._should_persist_indexes():
self.logger.info("Cache save disabled (ephemeral/evaluation mode)")
Expand All @@ -722,7 +722,7 @@ def _save_to_cache(self, cache_name: Optional[str] = None):
try:
if cache_name is None:
cache_name = self._get_cache_name()
self.vector_store.save(cache_name)
self.vector_store.save(cache_name, indexed_commit=indexed_commit)
self.logger.info(f"Saved index to cache: {cache_name}")
except Exception as e:
self.logger.warning(f"Failed to save to cache: {e}")
Expand Down Expand Up @@ -998,7 +998,8 @@ def load_multiple_repositories(self, sources: List[Dict[str, Any]]):
temp_vector_store.add_vectors(vectors_array, metadata)

# Save this repository's vector index separately
temp_vector_store.save(repo_name)
indexed_commit = repo_info.get("commit") or self.loader.get_head_commit()
temp_vector_store.save(repo_name, indexed_commit=indexed_commit)

# Build and save BM25 index for this repository
temp_retriever = HybridRetriever(self.config, temp_vector_store,
Expand Down Expand Up @@ -1272,6 +1273,98 @@ def _load_multi_repo_cache(self, repo_names: Optional[List[str]] = None) -> bool
self.logger.error(traceback.format_exc())
return False

def check_repo_for_updates(self, repo_name: str) -> Dict[str, Any]:
"""
Check whether a previously-indexed repo has upstream changes or
local-disk changes since it was indexed.

Returns a dict with:
- stale (bool): True if the index is out of date
- indexed_commit (str | None)
- current_commit (str | None)
- remote_commit (str | None)
- has_remote_updates (bool)
- error (str | None)
"""
result: Dict[str, Any] = {
"repo_name": repo_name,
"stale": False,
"indexed_commit": None,
"current_commit": None,
"remote_commit": None,
"has_remote_updates": False,
"error": None,
}

indexed_commit = self.vector_store.get_indexed_commit(repo_name)
result["indexed_commit"] = indexed_commit

repo_dir = os.path.join(self.loader.safe_repo_root, repo_name)
if not os.path.isdir(repo_dir):
result["error"] = f"Repo directory not found: {repo_dir}"
return result

current_commit = self.loader.get_head_commit(repo_dir)
result["current_commit"] = current_commit

# Local staleness: HEAD moved since we indexed (e.g. manual git pull)
if indexed_commit and current_commit and indexed_commit != current_commit:
result["stale"] = True

# Remote staleness: origin has newer commits
update_info = self.loader.check_for_updates(repo_dir)
if update_info.get("error"):
result["error"] = update_info["error"]
else:
result["remote_commit"] = update_info.get("remote_commit")
result["has_remote_updates"] = update_info.get("has_updates", False)
if update_info.get("has_updates"):
result["stale"] = True

return result

def refresh_repository(self, repo_name: str) -> Dict[str, Any]:
"""
Pull latest changes for a repo and re-index it.

Returns a dict with pull results and indexing status.
"""
result: Dict[str, Any] = {
"repo_name": repo_name,
"pulled": False,
"reindexed": False,
"old_commit": None,
"new_commit": None,
"error": None,
}

repo_dir = os.path.join(self.loader.safe_repo_root, repo_name)
if not os.path.isdir(repo_dir):
result["error"] = f"Repo directory not found: {repo_dir}"
return result

pull_result = self.loader.pull_updates(repo_dir)
result["old_commit"] = pull_result.get("old_commit")
result["new_commit"] = pull_result.get("new_commit")

if not pull_result.get("success"):
result["error"] = pull_result.get("error", "Pull failed")
return result

result["pulled"] = True

if not pull_result.get("changed"):
self.logger.info(f"No new commits for {repo_name}, skipping re-index")
result["reindexed"] = False
return result

# Re-load and re-index
self.load_repository(repo_dir, is_url=False)
self.index_repository(force=True)
self.vector_store.invalidate_scan_cache()
result["reindexed"] = True
return result

def cleanup(self):
"""Cleanup resources"""
self.loader.cleanup()
Expand Down
30 changes: 23 additions & 7 deletions fastcode/vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,12 +442,13 @@ def filter_by_repositories(self, repo_names: List[str]) -> List[int]:
indices.append(i)
return indices

def save(self, name: str = "index"):
def save(self, name: str = "index", indexed_commit: Optional[str] = None):
"""
Save index and metadata to disk

Args:
name: Name for the saved files
indexed_commit: Git commit SHA that was indexed (stored for staleness detection)
"""
if self.in_memory:
self.logger.info("Skipping vector store save (in-memory mode enabled)")
Expand All @@ -464,13 +465,16 @@ def save(self, name: str = "index"):
faiss.write_index(self.index, index_path)

# Save metadata
payload = {
"metadata": self.metadata,
"dimension": self.dimension,
"distance_metric": self.distance_metric,
"index_type": self.index_type,
}
if indexed_commit:
payload["indexed_commit"] = indexed_commit
with open(metadata_path, 'wb') as f:
pickle.dump({
"metadata": self.metadata,
"dimension": self.dimension,
"distance_metric": self.distance_metric,
"index_type": self.index_type,
}, f)
pickle.dump(payload, f)

# Invalidate cache since we just modified the indexes
self.invalidate_scan_cache()
Expand Down Expand Up @@ -524,6 +528,18 @@ def load(self, name: str = "index") -> bool:
self.logger.error(f"Failed to load vector store: {e}")
return False

def get_indexed_commit(self, name: str = "index") -> Optional[str]:
"""Read the indexed_commit SHA from a saved metadata pickle without loading the full index."""
metadata_path = os.path.join(self.persist_dir, f"{name}_metadata.pkl")
if not os.path.exists(metadata_path):
return None
try:
with open(metadata_path, 'rb') as f:
data = pickle.load(f)
return data.get("indexed_commit")
except Exception:
return None

def clear(self):
"""Clear all vectors and metadata"""
if self.dimension:
Expand Down
Loading