-
Notifications
You must be signed in to change notification settings - Fork 1
Final stabilization pass for VFS and Swarm #283
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -1,8 +1,8 @@ | ||||||
| use anyhow::Result; | ||||||
| use serde::{Deserialize, Serialize}; | ||||||
| use sha2::{Digest, Sha256}; | ||||||
| use std::path::{Path, PathBuf}; | ||||||
| use tempfile::tempdir; | ||||||
| use thiserror::Error; | ||||||
| use tracing::{info, warn}; | ||||||
| use walkdir::WalkDir; | ||||||
|
|
||||||
|
|
@@ -31,6 +31,23 @@ pub struct DocumentRecord { | |||||
| pub chunks: Vec<Chunk>, | ||||||
| } | ||||||
|
|
||||||
| #[derive(Debug, Error)] | ||||||
| pub enum IndexerError { | ||||||
| #[error("IO error: {0}")] | ||||||
| Io(#[from] std::io::Error), | ||||||
|
|
||||||
| #[error("Git error: {0}")] | ||||||
| Git(#[from] git2::Error), | ||||||
|
|
||||||
| #[error("Strip prefix error: {0}")] | ||||||
| StripPrefix(#[from] std::path::StripPrefixError), | ||||||
|
|
||||||
| #[error("Other error: {0}")] | ||||||
| Other(String), | ||||||
| } | ||||||
|
|
||||||
| pub type Result<T> = std::result::Result<T, IndexerError>; | ||||||
|
|
||||||
| pub struct Indexer { | ||||||
| allowlist: Vec<String>, | ||||||
| max_file_size: u64, | ||||||
|
|
@@ -79,7 +96,9 @@ impl Indexer { | |||||
| url: &str, | ||||||
| ) -> Result<(RepositoryMetadata, PathBuf, Option<tempfile::TempDir>)> { | ||||||
| if Path::new(url).exists() { | ||||||
| let path = PathBuf::from(url).canonicalize()?; | ||||||
| let path = PathBuf::from(url) | ||||||
| .canonicalize() | ||||||
| .map_err(IndexerError::Io)?; | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The explicit
Suggested change
|
||||||
| let name = path | ||||||
| .file_name() | ||||||
| .unwrap_or_default() | ||||||
|
|
@@ -160,8 +179,12 @@ impl Indexer { | |||||
|
|
||||||
| /// Process a file into chunks and metadata. | ||||||
| pub fn process_file(&self, root: &Path, file_path: &Path) -> Result<DocumentRecord> { | ||||||
| let relative_path = file_path.strip_prefix(root)?.to_string_lossy().to_string(); | ||||||
| let content = std::fs::read_to_string(file_path)?; | ||||||
| let relative_path = file_path | ||||||
| .strip_prefix(root) | ||||||
| .map_err(IndexerError::StripPrefix)? | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||
| .to_string_lossy() | ||||||
| .to_string(); | ||||||
| let content = std::fs::read_to_string(file_path).map_err(IndexerError::Io)?; | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||
|
|
||||||
| let mut hasher = Sha256::new(); | ||||||
| hasher.update(content.as_bytes()); | ||||||
|
|
@@ -229,8 +252,13 @@ impl Indexer { | |||||
|
|
||||||
| #[async_trait::async_trait] | ||||||
| pub trait VectorAdapter: Send + Sync { | ||||||
| async fn index_document(&self, repo_id: &str, doc: DocumentRecord) -> Result<()>; | ||||||
| async fn search(&self, repo_id: &str, query: &str, limit: usize) -> Result<Vec<VectorRecord>>; | ||||||
| async fn index_document(&self, repo_id: &str, doc: DocumentRecord) -> anyhow::Result<()>; | ||||||
| async fn search( | ||||||
| &self, | ||||||
| repo_id: &str, | ||||||
| query: &str, | ||||||
| limit: usize, | ||||||
| ) -> anyhow::Result<Vec<VectorRecord>>; | ||||||
| } | ||||||
|
|
||||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||||
|
|
@@ -252,7 +280,7 @@ impl SurrealAdapter { | |||||
|
|
||||||
| #[async_trait::async_trait] | ||||||
| impl VectorAdapter for SurrealAdapter { | ||||||
| async fn index_document(&self, repo_id: &str, doc: DocumentRecord) -> Result<()> { | ||||||
| async fn index_document(&self, repo_id: &str, doc: DocumentRecord) -> anyhow::Result<()> { | ||||||
| let created_at = chrono::Utc::now().to_rfc3339(); | ||||||
|
|
||||||
| // 1. Create document record | ||||||
|
|
@@ -293,7 +321,12 @@ impl VectorAdapter for SurrealAdapter { | |||||
| Ok(()) | ||||||
| } | ||||||
|
|
||||||
| async fn search(&self, repo_id: &str, query: &str, limit: usize) -> Result<Vec<VectorRecord>> { | ||||||
| async fn search( | ||||||
| &self, | ||||||
| repo_id: &str, | ||||||
| query: &str, | ||||||
| limit: usize, | ||||||
| ) -> anyhow::Result<Vec<VectorRecord>> { | ||||||
| // Simple keyword-based search in SurrealDB as a fallback for pure vector search | ||||||
| let sql = "SELECT path, content FROM chunks WHERE doc_id CONTAINS $repo_id AND content CONTAINS $query LIMIT $limit"; | ||||||
| let results: Vec<VectorRecord> = self | ||||||
|
|
@@ -337,21 +370,22 @@ mod tests { | |||||
| } | ||||||
|
|
||||||
| #[test] | ||||||
| fn test_scan() { | ||||||
| let dir = tempdir().unwrap(); | ||||||
| fn test_scan() -> anyhow::Result<()> { | ||||||
| let dir = tempdir().expect("failed to create temp dir"); | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the test function now returns
Suggested change
|
||||||
| let file_path = dir.path().join("test.rs"); | ||||||
| let mut file = File::create(&file_path).unwrap(); | ||||||
| writeln!(file, "fn main() {{}}").unwrap(); | ||||||
| let mut file = File::create(&file_path).expect("failed to create test file"); | ||||||
| writeln!(file, "fn main() {{}}").expect("failed to write to test file"); | ||||||
|
|
||||||
| let hidden_dir = dir.path().join(".git"); | ||||||
| std::fs::create_dir(&hidden_dir).unwrap(); | ||||||
| std::fs::create_dir(&hidden_dir).expect("failed to create hidden dir"); | ||||||
| let hidden_file = hidden_dir.join("config"); | ||||||
| File::create(&hidden_file).unwrap(); | ||||||
| File::create(&hidden_file).expect("failed to create hidden file"); | ||||||
|
|
||||||
| let indexer = Indexer::default(); | ||||||
| let files = indexer.scan(dir.path()); | ||||||
|
|
||||||
| assert_eq!(files.len(), 1); | ||||||
| assert!(files[0].ends_with("test.rs")); | ||||||
| Ok(()) | ||||||
| } | ||||||
| } | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -57,9 +57,49 @@ pub struct FileWatchEvent { | |
|
|
||
| #[async_trait] | ||
| pub trait VirtualFileSystem: Send + Sync { | ||
| /// Reads the content of a file at the given path. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// # use std::path::Path; | ||
| /// # use gestalt_core::ports::outbound::vfs::{VirtualFileSystem, OverlayFs}; | ||
| /// # tokio_test::block_on(async { | ||
| /// let vfs = OverlayFs::new(); | ||
| /// let data = vfs.read(Path::new("hello.txt")).await; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| /// # }); | ||
| /// ``` | ||
| async fn read(&self, path: &Path) -> Result<Vec<u8>>; | ||
|
|
||
| /// Writes data to a file at the given path, associated with an owner. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// # use std::path::Path; | ||
| /// # use gestalt_core::ports::outbound::vfs::{VirtualFileSystem, OverlayFs}; | ||
| /// # tokio_test::block_on(async { | ||
| /// let vfs = OverlayFs::new(); | ||
| /// vfs.write(Path::new("hello.txt"), b"world".to_vec(), "agent-1").await.unwrap(); | ||
| /// # }); | ||
| /// ``` | ||
| async fn write(&self, path: &Path, data: Vec<u8>, owner: &str) -> Result<()>; | ||
|
|
||
| /// Lists entries in the directory at the given path. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// # use std::path::Path; | ||
| /// # use gestalt_core::ports::outbound::vfs::{VirtualFileSystem, OverlayFs}; | ||
| /// # tokio_test::block_on(async { | ||
| /// let vfs = OverlayFs::new(); | ||
| /// let entries = vfs.list(Path::new(".")).await.unwrap(); | ||
| /// # }); | ||
| /// ``` | ||
| async fn list(&self, path: &Path) -> Result<Vec<PathBuf>>; | ||
|
|
||
| /// Checks if a file or directory exists at the given path. | ||
| async fn exists(&self, path: &Path) -> Result<bool>; | ||
|
|
||
| // Extended/Compatibility methods | ||
|
|
@@ -711,4 +751,42 @@ mod tests { | |
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn list_merges_overlay_and_disk_entries() -> Result<()> { | ||
| let tmp = tempdir()?; | ||
| let dir = tmp.path().to_path_buf(); | ||
| let disk_file = dir.join("disk.txt"); | ||
| let overlay_file = dir.join("overlay.txt"); | ||
|
|
||
| tokio::fs::write(&disk_file, "disk").await?; | ||
|
|
||
| let vfs = OverlayFs::new(); | ||
| vfs.write_string(&overlay_file, "overlay".to_string(), "agent-a") | ||
| .await?; | ||
|
|
||
| let entries = vfs.list(&dir).await?; | ||
| assert!(entries.contains(&disk_file)); | ||
| assert!(entries.contains(&overlay_file)); | ||
| assert_eq!(entries.len(), 2); | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn list_handles_non_existent_disk_dir_with_overlay_entries() -> Result<()> { | ||
| let tmp = tempdir()?; | ||
| let dir = tmp.path().join("ghost_dir"); | ||
|
|
||
| let vfs = OverlayFs::new(); | ||
| let overlay_file = dir.join("new.txt"); | ||
| vfs.write_string(&overlay_file, "new".to_string(), "agent-a") | ||
| .await?; | ||
|
|
||
| let entries = vfs.list(&dir).await?; | ||
| assert!(entries.contains(&overlay_file)); | ||
| assert_eq!(entries.len(), 1); | ||
|
|
||
| Ok(()) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| # Gestalt Swarm | ||
|
|
||
| Gestalt Swarm is a high-throughput parallel execution bridge for AI agent tasks. It is designed to run many short-lived tasks in parallel, making it ideal for large-scale codebase analysis or refactoring. | ||
|
|
||
| ## CLI Usage | ||
|
|
||
| The `swarm` command provides several subcommands for managing and running parallel tasks. | ||
|
|
||
| ### 1. Check Status | ||
| Verify that the swarm is active and ready to accept tasks. | ||
|
|
||
| ```bash | ||
| cargo run -p gestalt_swarm -- status | ||
| ``` | ||
|
|
||
| ### 2. Run a Task | ||
| Submit a goal to the swarm for parallel execution. | ||
|
|
||
| ```bash | ||
| cargo run -p gestalt_swarm -- run --goal "Refactor all unwrap() calls in gestalt_core" | ||
| ``` | ||
|
|
||
| ### 3. Verbose Output | ||
| Use the `--verbose` or `-v` flag to enable debug logging. | ||
|
|
||
| ```bash | ||
| cargo run -p gestalt_swarm -- -v status | ||
| ``` | ||
|
|
||
| ## Architecture | ||
|
|
||
| Swarm utilizes a lead agent to decompose complex goals into smaller, independent tasks which are then dispatched to a pool of worker agents. It leverages the Virtual File System (VFS) to ensure that parallel modifications do not conflict and can be merged safely. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
Othervariant in theIndexerErrorenum is currently unused within the codebase. If this is intended for future use, consider adding aTODOcomment or removing it to keep the public API clean and avoid dead code.