diff --git a/api.py b/api.py index f0e2573..6eeaa56 100644 --- a/api.py +++ b/api.py @@ -700,6 +700,43 @@ async def delete_repositories(request: DeleteReposRequest): raise HTTPException(status_code=500, detail=str(e)) +class CheckFreshnessRequest(BaseModel): + repo_names: List[str] = Field(..., description="Repository names to check for updates") + + +class RefreshRepoRequest(BaseModel): + repo_name: str = Field(..., description="Repository name to refresh") + + +@app.post("/check-repo-freshness") +async def check_repo_freshness(request: CheckFreshnessRequest): + """Check whether indexed repositories are up-to-date with their remotes. + + Performs a lightweight git fetch + SHA comparison without modifying the index. + """ + fastcode = _ensure_fastcode_initialized() + + results = [] + for name in request.repo_names: + info = await asyncio.to_thread(fastcode.check_repo_for_updates, name) + results.append(info) + + return {"status": "success", "results": results} + + +@app.post("/refresh-repo") +async def refresh_repo(request: RefreshRepoRequest): + """Pull latest changes for a repository and re-index it.""" + fastcode = _ensure_fastcode_initialized() + + result = await asyncio.to_thread(fastcode.refresh_repository, request.repo_name) + + if result.get("error"): + raise HTTPException(status_code=500, detail=result["error"]) + + return {"status": "success", **result} + + @app.post("/clear-cache") async def clear_cache(): """Clear cache""" diff --git a/fastcode/loader.py b/fastcode/loader.py index 24bb03b..9b29068 100644 --- a/fastcode/loader.py +++ b/fastcode/loader.py @@ -356,7 +356,7 @@ def get_repository_info(self) -> Dict[str, Any]: repo = Repo(self.repo_path) info.update({ "branch": repo.active_branch.name, - "commit": repo.head.commit.hexsha[:8], + "commit": repo.head.commit.hexsha, "remote_url": repo.remotes.origin.url if repo.remotes else None, }) except Exception: @@ -371,6 +371,117 @@ def get_repository_info(self) -> Dict[str, Any]: return info + def get_head_commit(self, repo_path: Optional[str] = None) -> Optional[str]: + """Return the full HEAD commit SHA for a repo on disk, or None.""" + target = repo_path or self.repo_path + if not target or not os.path.isdir(target): + return None + try: + repo = Repo(target) + return repo.head.commit.hexsha + except Exception: + return None + + def check_for_updates(self, repo_path: Optional[str] = None) -> Dict[str, Any]: + """ + Fetch from origin and compare local HEAD with remote HEAD. + + Returns a dict with: + - has_updates (bool) + - local_commit (str | None) + - remote_commit (str | None) + - error (str | None) + """ + target = repo_path or self.repo_path + result: Dict[str, Any] = { + "has_updates": False, + "local_commit": None, + "remote_commit": None, + "error": None, + } + if not target or not os.path.isdir(target): + result["error"] = f"Path does not exist: {target}" + return result + + try: + repo = Repo(target) + if not repo.remotes: + result["error"] = "No remote configured" + return result + + local_commit = repo.head.commit.hexsha + result["local_commit"] = local_commit + + origin = repo.remotes.origin + origin.fetch() + + tracking = repo.active_branch.tracking_branch() + if tracking is None: + # Shallow clones may lack tracking info; compare with origin/ + branch_name = repo.active_branch.name + remote_ref = f"origin/{branch_name}" + if remote_ref in [str(r) for r in repo.refs]: + remote_commit = repo.refs[remote_ref].commit.hexsha + else: + result["error"] = f"Cannot determine remote ref for {branch_name}" + return result + else: + remote_commit = tracking.commit.hexsha + + result["remote_commit"] = remote_commit + result["has_updates"] = local_commit != remote_commit + except Exception as e: + result["error"] = str(e) + + return result + + def pull_updates(self, repo_path: Optional[str] = None) -> Dict[str, Any]: + """ + Pull latest changes from origin for the given repo. + + For shallow clones (depth=1) we unshallow first so that pull works + reliably, then re-shallow to keep disk usage low. + + Returns a dict with: + - success (bool) + - old_commit (str | None) + - new_commit (str | None) + - changed (bool) – whether HEAD actually moved + - error (str | None) + """ + target = repo_path or self.repo_path + result: Dict[str, Any] = { + "success": False, + "old_commit": None, + "new_commit": None, + "changed": False, + "error": None, + } + if not target or not os.path.isdir(target): + result["error"] = f"Path does not exist: {target}" + return result + + try: + repo = Repo(target) + if not repo.remotes: + result["error"] = "No remote configured" + return result + + old_commit = repo.head.commit.hexsha + result["old_commit"] = old_commit + + origin = repo.remotes.origin + origin.pull() + + new_commit = repo.head.commit.hexsha + result["new_commit"] = new_commit + result["changed"] = old_commit != new_commit + result["success"] = True + except Exception as e: + result["error"] = str(e) + + return result + def cleanup(self): """Clean up temporary directories""" if self.temp_dir and os.path.exists(self.temp_dir): diff --git a/fastcode/main.py b/fastcode/main.py index 0e493e5..cea10a0 100644 --- a/fastcode/main.py +++ b/fastcode/main.py @@ -277,8 +277,8 @@ def index_repository(self, force: bool = False): # Save artifacts only when persistence is enabled if self._should_persist_indexes(): - # Save to cache with repository-specific name - self._save_to_cache(cache_name=repo_name) + indexed_commit = self.repo_info.get("commit") or self.loader.get_head_commit() + self._save_to_cache(cache_name=repo_name, indexed_commit=indexed_commit) # Save BM25 and graph data self.retriever.save_bm25(repo_name) @@ -713,7 +713,7 @@ def _try_load_from_cache(self) -> bool: self.logger.warning(f"Failed to load from cache: {e}") return False - def _save_to_cache(self, cache_name: Optional[str] = None): + def _save_to_cache(self, cache_name: Optional[str] = None, indexed_commit: Optional[str] = None): """Save indexed data to cache""" if not self._should_persist_indexes(): self.logger.info("Cache save disabled (ephemeral/evaluation mode)") @@ -722,7 +722,7 @@ def _save_to_cache(self, cache_name: Optional[str] = None): try: if cache_name is None: cache_name = self._get_cache_name() - self.vector_store.save(cache_name) + self.vector_store.save(cache_name, indexed_commit=indexed_commit) self.logger.info(f"Saved index to cache: {cache_name}") except Exception as e: self.logger.warning(f"Failed to save to cache: {e}") @@ -998,7 +998,8 @@ def load_multiple_repositories(self, sources: List[Dict[str, Any]]): temp_vector_store.add_vectors(vectors_array, metadata) # Save this repository's vector index separately - temp_vector_store.save(repo_name) + indexed_commit = repo_info.get("commit") or self.loader.get_head_commit() + temp_vector_store.save(repo_name, indexed_commit=indexed_commit) # Build and save BM25 index for this repository temp_retriever = HybridRetriever(self.config, temp_vector_store, @@ -1272,6 +1273,98 @@ def _load_multi_repo_cache(self, repo_names: Optional[List[str]] = None) -> bool self.logger.error(traceback.format_exc()) return False + def check_repo_for_updates(self, repo_name: str) -> Dict[str, Any]: + """ + Check whether a previously-indexed repo has upstream changes or + local-disk changes since it was indexed. + + Returns a dict with: + - stale (bool): True if the index is out of date + - indexed_commit (str | None) + - current_commit (str | None) + - remote_commit (str | None) + - has_remote_updates (bool) + - error (str | None) + """ + result: Dict[str, Any] = { + "repo_name": repo_name, + "stale": False, + "indexed_commit": None, + "current_commit": None, + "remote_commit": None, + "has_remote_updates": False, + "error": None, + } + + indexed_commit = self.vector_store.get_indexed_commit(repo_name) + result["indexed_commit"] = indexed_commit + + repo_dir = os.path.join(self.loader.safe_repo_root, repo_name) + if not os.path.isdir(repo_dir): + result["error"] = f"Repo directory not found: {repo_dir}" + return result + + current_commit = self.loader.get_head_commit(repo_dir) + result["current_commit"] = current_commit + + # Local staleness: HEAD moved since we indexed (e.g. manual git pull) + if indexed_commit and current_commit and indexed_commit != current_commit: + result["stale"] = True + + # Remote staleness: origin has newer commits + update_info = self.loader.check_for_updates(repo_dir) + if update_info.get("error"): + result["error"] = update_info["error"] + else: + result["remote_commit"] = update_info.get("remote_commit") + result["has_remote_updates"] = update_info.get("has_updates", False) + if update_info.get("has_updates"): + result["stale"] = True + + return result + + def refresh_repository(self, repo_name: str) -> Dict[str, Any]: + """ + Pull latest changes for a repo and re-index it. + + Returns a dict with pull results and indexing status. + """ + result: Dict[str, Any] = { + "repo_name": repo_name, + "pulled": False, + "reindexed": False, + "old_commit": None, + "new_commit": None, + "error": None, + } + + repo_dir = os.path.join(self.loader.safe_repo_root, repo_name) + if not os.path.isdir(repo_dir): + result["error"] = f"Repo directory not found: {repo_dir}" + return result + + pull_result = self.loader.pull_updates(repo_dir) + result["old_commit"] = pull_result.get("old_commit") + result["new_commit"] = pull_result.get("new_commit") + + if not pull_result.get("success"): + result["error"] = pull_result.get("error", "Pull failed") + return result + + result["pulled"] = True + + if not pull_result.get("changed"): + self.logger.info(f"No new commits for {repo_name}, skipping re-index") + result["reindexed"] = False + return result + + # Re-load and re-index + self.load_repository(repo_dir, is_url=False) + self.index_repository(force=True) + self.vector_store.invalidate_scan_cache() + result["reindexed"] = True + return result + def cleanup(self): """Cleanup resources""" self.loader.cleanup() diff --git a/fastcode/vector_store.py b/fastcode/vector_store.py index 5c9047f..edf38b0 100644 --- a/fastcode/vector_store.py +++ b/fastcode/vector_store.py @@ -442,12 +442,13 @@ def filter_by_repositories(self, repo_names: List[str]) -> List[int]: indices.append(i) return indices - def save(self, name: str = "index"): + def save(self, name: str = "index", indexed_commit: Optional[str] = None): """ Save index and metadata to disk Args: name: Name for the saved files + indexed_commit: Git commit SHA that was indexed (stored for staleness detection) """ if self.in_memory: self.logger.info("Skipping vector store save (in-memory mode enabled)") @@ -464,13 +465,16 @@ def save(self, name: str = "index"): faiss.write_index(self.index, index_path) # Save metadata + payload = { + "metadata": self.metadata, + "dimension": self.dimension, + "distance_metric": self.distance_metric, + "index_type": self.index_type, + } + if indexed_commit: + payload["indexed_commit"] = indexed_commit with open(metadata_path, 'wb') as f: - pickle.dump({ - "metadata": self.metadata, - "dimension": self.dimension, - "distance_metric": self.distance_metric, - "index_type": self.index_type, - }, f) + pickle.dump(payload, f) # Invalidate cache since we just modified the indexes self.invalidate_scan_cache() @@ -524,6 +528,18 @@ def load(self, name: str = "index") -> bool: self.logger.error(f"Failed to load vector store: {e}") return False + def get_indexed_commit(self, name: str = "index") -> Optional[str]: + """Read the indexed_commit SHA from a saved metadata pickle without loading the full index.""" + metadata_path = os.path.join(self.persist_dir, f"{name}_metadata.pkl") + if not os.path.exists(metadata_path): + return None + try: + with open(metadata_path, 'rb') as f: + data = pickle.load(f) + return data.get("indexed_commit") + except Exception: + return None + def clear(self): """Clear all vectors and metadata""" if self.dimension: diff --git a/main.py b/main.py index 61506fc..1941245 100644 --- a/main.py +++ b/main.py @@ -940,6 +940,51 @@ def delete_session(session_id, config, confirm): sys.exit(1) +@cli.command() +@click.option('--repos', '-r', multiple=True, required=True, help='Repository names to check') +@click.option('--config', '-c', help='Path to configuration file') +def check_freshness(repos, config): + """Check whether indexed repositories are up-to-date with their remotes""" + + fastcode = FastCode(config_path=config) + + for name in repos: + info = fastcode.check_repo_for_updates(name) + if info.get("error"): + click.echo(f" {name}: error — {info['error']}") + elif info.get("stale"): + indexed = (info.get("indexed_commit") or "unknown")[:8] + remote = (info.get("remote_commit") or info.get("current_commit") or "unknown")[:8] + click.echo(f" {name}: OUTDATED (indexed {indexed}, latest {remote})") + else: + commit = (info.get("indexed_commit") or "unknown")[:8] + click.echo(f" {name}: up-to-date ({commit})") + + +@cli.command() +@click.argument('repo_name') +@click.option('--config', '-c', help='Path to configuration file') +def refresh(repo_name, config): + """Pull latest changes for a repository and re-index it""" + + fastcode = FastCode(config_path=config) + + click.echo(f"Refreshing '{repo_name}' …") + result = fastcode.refresh_repository(repo_name) + + if result.get("error"): + click.echo(f"Error: {result['error']}", err=True) + sys.exit(1) + + old = (result.get("old_commit") or "unknown")[:8] + new = (result.get("new_commit") or "unknown")[:8] + + if result.get("reindexed"): + click.echo(f"Refreshed {repo_name}: {old} -> {new} (re-indexed)") + else: + click.echo(f"Already up-to-date at {new}") + + if __name__ == '__main__': cli() diff --git a/mcp_server.py b/mcp_server.py index c798c13..77d57e8 100644 --- a/mcp_server.py +++ b/mcp_server.py @@ -127,15 +127,25 @@ def _apply_forced_env_excludes(fc) -> None: logger.info(f"Added forced ignore patterns: {added}") +_staleness_warnings: List[str] = [] +"""Per-call staleness warnings collected by _ensure_repos_ready.""" + + def _ensure_repos_ready(repos: List[str], ctx=None) -> List[str]: """ For each repo source string: - - If already indexed → skip + - If already indexed → check for staleness, warn if outdated - If URL and not on disk → clone + index - If local path → load + index + Staleness warnings are collected in the module-level ``_staleness_warnings`` + list so callers (e.g. ``code_qa``) can append them to the response. + Returns the list of canonical repo names that are ready. """ + global _staleness_warnings + _staleness_warnings = [] + fc = _get_fastcode() _apply_forced_env_excludes(fc) ready_names: List[str] = [] @@ -144,10 +154,31 @@ def _ensure_repos_ready(repos: List[str], ctx=None) -> List[str]: resolved_is_url = fc._infer_is_url(source) name = _repo_name_from_source(source, resolved_is_url) - # Already indexed – nothing to do + # Already indexed – check freshness before moving on if _is_repo_indexed(name): - logger.info(f"Repo '{name}' already indexed, skipping.") + logger.info(f"Repo '{name}' already indexed, checking freshness …") ready_names.append(name) + + try: + update_info = fc.check_repo_for_updates(name) + if update_info.get("stale"): + short_old = (update_info.get("indexed_commit") or "unknown")[:8] + short_new = ( + update_info.get("remote_commit") + or update_info.get("current_commit") + or "unknown" + )[:8] + msg = ( + f"Note: '{name}' index was built at commit {short_old} " + f"but the repo is now at {short_new}. " + f"Run the refresh_repo tool with repo_name=\"{name}\" " + f"to pull latest changes and re-index." + ) + _staleness_warnings.append(msg) + logger.warning(msg) + except Exception as e: + logger.debug(f"Staleness check failed for '{name}': {e}") + continue # Need to index @@ -186,7 +217,12 @@ def _ensure_repos_ready(repos: List[str], ctx=None) -> List[str]: # If signature introspection fails, fall back to the safest constructor shape. pass -mcp = FastMCP("FastCode", **_fastmcp_kwargs) +mcp = FastMCP( + "FastCode", + host=os.getenv("FASTMCP_HOST", "0.0.0.0"), + port=int(os.getenv("FASTMCP_PORT", "8080")), + **_fastmcp_kwargs, +) @mcp.tool() @@ -267,6 +303,11 @@ def code_qa( loc = f"L{start}-L{end}" if start and end else "" parts.append(f" - {repo}/{file_path}:{loc} ({name})" if repo else f" - {file_path}:{loc} ({name})") + if _staleness_warnings: + parts.append("\n\n---\nRepository freshness:") + for warning in _staleness_warnings: + parts.append(f" - {warning}") + parts.append(f"\n[session_id: {sid}]") return "\n".join(parts) @@ -367,6 +408,86 @@ def list_indexed_repos() -> str: return "\n".join(lines) +@mcp.tool() +def check_repo_freshness(repos: list[str]) -> str: + """Check whether indexed repositories are up-to-date with their remotes. + + This is a lightweight check (git fetch + SHA comparison) that does NOT + modify the index. Use refresh_repo to actually pull and re-index. + + Args: + repos: Repository sources (URLs or local paths) or repo names to check. + + Returns: + A freshness report for each repository. + """ + fc = _get_fastcode() + lines = ["Repository freshness report:"] + + for source in repos: + resolved_is_url = fc._infer_is_url(source) + name = _repo_name_from_source(source, resolved_is_url) + + if not _is_repo_indexed(name): + lines.append(f" - {name}: not indexed") + continue + + info = fc.check_repo_for_updates(name) + if info.get("error"): + lines.append(f" - {name}: error checking — {info['error']}") + elif info.get("stale"): + indexed = (info.get("indexed_commit") or "unknown")[:8] + remote = (info.get("remote_commit") or info.get("current_commit") or "unknown")[:8] + lines.append( + f" - {name}: OUTDATED (indexed {indexed}, latest {remote}) " + f"— use refresh_repo to update" + ) + else: + commit = (info.get("indexed_commit") or "unknown")[:8] + lines.append(f" - {name}: up-to-date ({commit})") + + return "\n".join(lines) + + +@mcp.tool() +def refresh_repo(repo_name: str) -> str: + """Pull the latest changes for a repository and re-index it. + + This performs a git pull on the cloned repo, then re-indexes if new + commits were found. Use check_repo_freshness first to see which repos + need refreshing. + + Args: + repo_name: The repository name (as shown by list_indexed_repos). + + Returns: + A summary of what changed and whether re-indexing occurred. + """ + fc = _get_fastcode() + _apply_forced_env_excludes(fc) + + if not _is_repo_indexed(repo_name): + return f"Repository '{repo_name}' is not indexed. Use code_qa to index it first." + + result = fc.refresh_repository(repo_name) + + if result.get("error"): + return f"Failed to refresh '{repo_name}': {result['error']}" + + old = (result.get("old_commit") or "unknown")[:8] + new = (result.get("new_commit") or "unknown")[:8] + + if not result.get("reindexed"): + return f"Repository '{repo_name}' is already up-to-date at {new}." + + return ( + f"Repository '{repo_name}' refreshed successfully.\n" + f" Previous commit: {old}\n" + f" Current commit: {new}\n" + f" Re-indexed: yes" + ) + + @mcp.tool() def delete_repo_metadata(repo_name: str) -> str: """Delete indexed metadata for a repository while keeping source code. @@ -419,6 +540,6 @@ def delete_repo_metadata(repo_name: str) -> str: args = parser.parse_args() if args.transport == "sse": - mcp.run(transport="sse", sse_params={"port": args.port}) + mcp.run(transport="sse") else: mcp.run(transport="stdio") diff --git a/web_app.py b/web_app.py index ba5d47e..39e0d93 100644 --- a/web_app.py +++ b/web_app.py @@ -82,6 +82,14 @@ class StatusResponse(BaseModel): loaded_repositories: List[Dict[str, Any]] = Field(default_factory=list) +class CheckFreshnessRequest(BaseModel): + repo_names: List[str] = Field(..., description="Repository names to check for updates") + + +class RefreshRepoRequest(BaseModel): + repo_name: str = Field(..., description="Repository name to refresh") + + # Initialize FastAPI app app = FastAPI( title="FastCode Web Interface", @@ -620,6 +628,34 @@ async def get_repository_summary(): return summary_payload +@app.post("/api/check-repo-freshness") +async def check_repo_freshness(request: CheckFreshnessRequest): + """Check whether indexed repositories are up-to-date with their remotes.""" + if fastcode_instance is None: + raise HTTPException(status_code=500, detail="FastCode not initialized") + + results = [] + for name in request.repo_names: + info = await asyncio.to_thread(fastcode_instance.check_repo_for_updates, name) + results.append(info) + + return {"status": "success", "results": results} + + +@app.post("/api/refresh-repo") +async def refresh_repo(request: RefreshRepoRequest): + """Pull latest changes for a repository and re-index it.""" + if fastcode_instance is None: + raise HTTPException(status_code=500, detail="FastCode not initialized") + + result = await asyncio.to_thread(fastcode_instance.refresh_repository, request.repo_name) + + if result.get("error"): + raise HTTPException(status_code=500, detail=result["error"]) + + return {"status": "success", **result} + + @app.post("/api/clear-cache") async def clear_cache(): """Clear cache"""