From 0f4fef0e8a3446109779a894553452d911ab0443 Mon Sep 17 00:00:00 2001 From: Joshua Elliott Date: Thu, 7 Aug 2025 09:12:52 -0600 Subject: [PATCH 01/11] Basic filesystem watcher implementation --- oxen-rust/Cargo.toml | 2 +- oxen-rust/src/watcher/Cargo.toml | 25 +++ oxen-rust/src/watcher/src/cache.rs | 177 +++++++++++++++++ oxen-rust/src/watcher/src/cli.rs | 33 ++++ oxen-rust/src/watcher/src/error.rs | 28 +++ oxen-rust/src/watcher/src/event_processor.rs | 143 ++++++++++++++ oxen-rust/src/watcher/src/ipc.rs | 197 ++++++++++++++++++ oxen-rust/src/watcher/src/main.rs | 88 +++++++++ oxen-rust/src/watcher/src/monitor.rs | 198 +++++++++++++++++++ oxen-rust/src/watcher/src/protocol.rs | 96 +++++++++ 10 files changed, 986 insertions(+), 1 deletion(-) create mode 100644 oxen-rust/src/watcher/Cargo.toml create mode 100644 oxen-rust/src/watcher/src/cache.rs create mode 100644 oxen-rust/src/watcher/src/cli.rs create mode 100644 oxen-rust/src/watcher/src/error.rs create mode 100644 oxen-rust/src/watcher/src/event_processor.rs create mode 100644 oxen-rust/src/watcher/src/ipc.rs create mode 100644 oxen-rust/src/watcher/src/main.rs create mode 100644 oxen-rust/src/watcher/src/monitor.rs create mode 100644 oxen-rust/src/watcher/src/protocol.rs diff --git a/oxen-rust/Cargo.toml b/oxen-rust/Cargo.toml index a0abedfd7..08f24ed56 100644 --- a/oxen-rust/Cargo.toml +++ b/oxen-rust/Cargo.toml @@ -148,7 +148,7 @@ async-recursion = "1.1.1" [workspace] -members = ["src/cli", "src/lib", "src/server"] +members = ["src/cli", "src/lib", "src/server", "src/watcher"] [profile.release] codegen-units = 1 diff --git a/oxen-rust/src/watcher/Cargo.toml b/oxen-rust/src/watcher/Cargo.toml new file mode 100644 index 000000000..204329f72 --- /dev/null +++ b/oxen-rust/src/watcher/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "oxen-watcher" +version = "0.36.3" +edition = "2021" +license-file = "../../LICENSE" +description = "Filesystem watcher daemon for Oxen status acceleration" +homepage = "https://oxen.ai" +repository = "https://github.com/Oxen-AI/Oxen" + +[[bin]] +name = "oxen-watcher" +path = "src/main.rs" + +[dependencies] +liboxen = { path = "../lib" } +notify = "6.1" +tokio = { version = "1", features = ["full"] } +rmp-serde = "1.3.0" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +clap = { version = "4.4.2", features = ["cargo", "derive"] } +log = "0.4" +env_logger = "0.11" +chrono = "0.4" +thiserror = "2.0" \ No newline at end of file diff --git a/oxen-rust/src/watcher/src/cache.rs b/oxen-rust/src/watcher/src/cache.rs new file mode 100644 index 000000000..19d3784ae --- /dev/null +++ b/oxen-rust/src/watcher/src/cache.rs @@ -0,0 +1,177 @@ +use crate::error::WatcherError; +use crate::protocol::{FileStatus, FileStatusType, StatusResult}; +use liboxen::model::LocalRepository; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::SystemTime; +use tokio::sync::RwLock; + +/// Memory-only status cache for fast access +pub struct StatusCache { + repo: LocalRepository, + /// In-memory cache + cache: Arc>, +} + +/// In-memory cache data structure +struct MemoryCache { + modified: HashMap, + added: HashMap, + removed: HashMap, + untracked: HashMap, + scan_complete: bool, + last_update: SystemTime, +} + +impl StatusCache { + /// Create a new status cache for a repository + pub fn new(repo_path: &Path) -> Result { + let repo = LocalRepository::from_dir(repo_path)?; + + // Initialize memory cache + let cache = Arc::new(RwLock::new(MemoryCache { + modified: HashMap::new(), + added: HashMap::new(), + removed: HashMap::new(), + untracked: HashMap::new(), + scan_complete: false, + last_update: SystemTime::now(), + })); + + Ok(Self { repo, cache }) + } + + /// Get the current status, optionally filtered by paths + pub async fn get_status(&self, paths: Option>) -> StatusResult { + let cache = self.cache.read().await; + + // Filter by paths if requested + let (modified, added, removed, untracked) = if let Some(paths) = paths { + let path_set: std::collections::HashSet<_> = paths.iter().collect(); + + ( + cache.modified.values() + .filter(|f| path_set.contains(&f.path)) + .cloned() + .collect(), + cache.added.values() + .filter(|f| path_set.contains(&f.path)) + .cloned() + .collect(), + cache.removed.keys() + .filter(|p| path_set.contains(p)) + .cloned() + .collect(), + cache.untracked.keys() + .filter(|p| path_set.contains(p)) + .cloned() + .collect(), + ) + } else { + ( + cache.modified.values().cloned().collect(), + cache.added.values().cloned().collect(), + cache.removed.keys().cloned().collect(), + cache.untracked.keys().cloned().collect(), + ) + }; + + StatusResult { + modified, + added, + removed, + untracked, + scan_complete: cache.scan_complete, + } + } + + /// Update a file's status in the cache + pub async fn update_file_status(&self, status: FileStatus) -> Result<(), WatcherError> { + let mut cache = self.cache.write().await; + + // Update memory cache + match status.status { + FileStatusType::Modified => { + cache.modified.insert(status.path.clone(), status.clone()); + cache.added.remove(&status.path); + cache.untracked.remove(&status.path); + } + FileStatusType::Added => { + cache.added.insert(status.path.clone(), status.clone()); + cache.modified.remove(&status.path); + cache.untracked.remove(&status.path); + } + FileStatusType::Removed => { + cache.removed.insert(status.path.clone(), status.clone()); + cache.modified.remove(&status.path); + cache.added.remove(&status.path); + cache.untracked.remove(&status.path); + } + FileStatusType::Untracked => { + cache.untracked.insert(status.path.clone(), status.clone()); + cache.modified.remove(&status.path); + cache.added.remove(&status.path); + } + } + + cache.last_update = SystemTime::now(); + + Ok(()) + } + + /// Batch update multiple file statuses + pub async fn batch_update(&self, statuses: Vec) -> Result<(), WatcherError> { + let mut cache = self.cache.write().await; + + for status in statuses { + // Update memory cache + match status.status { + FileStatusType::Modified => { + cache.modified.insert(status.path.clone(), status.clone()); + cache.added.remove(&status.path); + cache.untracked.remove(&status.path); + } + FileStatusType::Added => { + cache.added.insert(status.path.clone(), status.clone()); + cache.modified.remove(&status.path); + cache.untracked.remove(&status.path); + } + FileStatusType::Removed => { + cache.removed.insert(status.path.clone(), status.clone()); + cache.modified.remove(&status.path); + cache.added.remove(&status.path); + cache.untracked.remove(&status.path); + } + FileStatusType::Untracked => { + cache.untracked.insert(status.path.clone(), status.clone()); + cache.modified.remove(&status.path); + cache.added.remove(&status.path); + } + } + } + + cache.last_update = SystemTime::now(); + + Ok(()) + } + + /// Mark the initial scan as complete + pub async fn mark_scan_complete(&self) -> Result<(), WatcherError> { + let mut cache = self.cache.write().await; + cache.scan_complete = true; + Ok(()) + } + + /// Clear the entire cache + pub async fn clear(&self) -> Result<(), WatcherError> { + let mut cache = self.cache.write().await; + cache.modified.clear(); + cache.added.clear(); + cache.removed.clear(); + cache.untracked.clear(); + cache.scan_complete = false; + cache.last_update = SystemTime::now(); + Ok(()) + } +} \ No newline at end of file diff --git a/oxen-rust/src/watcher/src/cli.rs b/oxen-rust/src/watcher/src/cli.rs new file mode 100644 index 000000000..98b7c4881 --- /dev/null +++ b/oxen-rust/src/watcher/src/cli.rs @@ -0,0 +1,33 @@ +use clap::{Parser, Subcommand}; +use std::path::PathBuf; + +#[derive(Parser)] +#[command(name = "oxen-watcher")] +#[command(about = "Filesystem watcher daemon for Oxen repositories")] +#[command(version)] +pub struct Args { + #[command(subcommand)] + pub command: Commands, +} + +#[derive(Subcommand)] +pub enum Commands { + /// Start the filesystem watcher for a repository + Start { + /// Path to the repository + #[arg(short, long)] + repo: PathBuf, + }, + /// Stop the filesystem watcher for a repository + Stop { + /// Path to the repository + #[arg(short, long)] + repo: PathBuf, + }, + /// Check if the watcher is running for a repository + Status { + /// Path to the repository + #[arg(short, long)] + repo: PathBuf, + }, +} \ No newline at end of file diff --git a/oxen-rust/src/watcher/src/error.rs b/oxen-rust/src/watcher/src/error.rs new file mode 100644 index 000000000..9a6887771 --- /dev/null +++ b/oxen-rust/src/watcher/src/error.rs @@ -0,0 +1,28 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum WatcherError { + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + + #[error("Notify error: {0}")] + Notify(#[from] notify::Error), + + #[error("Serialization error: {0}")] + Serialization(#[from] rmp_serde::encode::Error), + + #[error("Deserialization error: {0}")] + Deserialization(#[from] rmp_serde::decode::Error), + + #[error("Oxen error: {0}")] + Oxen(#[from] liboxen::error::OxenError), + + #[error("Repository not found at: {0}")] + RepositoryNotFound(String), + + #[error("Watcher already running")] + AlreadyRunning, + + #[error("Failed to communicate with watcher: {0}")] + Communication(String), +} \ No newline at end of file diff --git a/oxen-rust/src/watcher/src/event_processor.rs b/oxen-rust/src/watcher/src/event_processor.rs new file mode 100644 index 000000000..1fa54775f --- /dev/null +++ b/oxen-rust/src/watcher/src/event_processor.rs @@ -0,0 +1,143 @@ +use crate::cache::StatusCache; +use crate::protocol::{FileStatus, FileStatusType}; +use log::{debug, error, trace}; +use notify::{Event, EventKind}; +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tokio::sync::mpsc; +use tokio::time; + +/// Processes filesystem events and updates the cache +pub struct EventProcessor { + cache: Arc, +} + +impl EventProcessor { + pub fn new(cache: Arc) -> Self { + Self { cache } + } + + /// Run the event processing loop + pub async fn run(self, mut event_rx: mpsc::Receiver) { + // Buffer for coalescing events + let mut event_buffer: HashMap = HashMap::new(); + let coalesce_window = Duration::from_millis(100); + let batch_size = 1000; + + let mut interval = time::interval(coalesce_window); + + loop { + tokio::select! { + // Process incoming events + Some(event) = event_rx.recv() => { + self.handle_event(event, &mut event_buffer); + + // Flush if buffer is getting large + if event_buffer.len() >= batch_size { + self.flush_events(&mut event_buffer).await; + } + } + + // Periodic flush of coalesced events + _ = interval.tick() => { + if !event_buffer.is_empty() { + self.flush_events(&mut event_buffer).await; + } + } + } + } + } + + /// Handle a single filesystem event + fn handle_event(&self, event: Event, buffer: &mut HashMap) { + trace!("Received event: {:?}", event); + + for path in event.paths { + // Skip .oxen directory + if path.components().any(|c| c.as_os_str() == ".oxen") { + continue; + } + + // Skip non-file events for now + if path.is_dir() { + continue; + } + + // Coalesce events for the same path + buffer.insert(path, (event.kind, Instant::now())); + } + } + + /// Flush buffered events to the cache + async fn flush_events(&self, buffer: &mut HashMap) { + if buffer.is_empty() { + return; + } + + debug!("Flushing {} events to cache", buffer.len()); + + let mut updates = Vec::new(); + let now = Instant::now(); + let stale_threshold = Duration::from_millis(200); + + // Process each buffered event + for (path, (kind, timestamp)) in buffer.drain() { + // Skip stale events + if now.duration_since(timestamp) > stale_threshold { + continue; + } + + // Determine the status type based on event kind and file existence + let status_type = match kind { + EventKind::Create(_) => { + // New file created + FileStatusType::Untracked + } + EventKind::Modify(_) => { + // File modified - need to check if it's tracked + // For now, assume modified if it exists + FileStatusType::Modified + } + EventKind::Remove(_) => { + // File removed + FileStatusType::Removed + } + EventKind::Any | EventKind::Access(_) | EventKind::Other => { + // Skip these events + continue; + } + }; + + // Get file metadata if it exists + let (mtime, size) = if let Ok(metadata) = std::fs::metadata(&path) { + ( + metadata.modified().unwrap_or(std::time::SystemTime::now()), + metadata.len(), + ) + } else if status_type == FileStatusType::Removed { + // File was removed, use current time and zero size + (std::time::SystemTime::now(), 0) + } else { + // Skip if we can't get metadata for non-removed files + continue; + }; + + updates.push(FileStatus { + path: path.clone(), + mtime, + size, + hash: None, // Will be computed later if needed + status: status_type, + }); + } + + // Batch update the cache + if !updates.is_empty() { + if let Err(e) = self.cache.batch_update(updates).await { + error!("Failed to update cache: {}", e); + } + } + } +} \ No newline at end of file diff --git a/oxen-rust/src/watcher/src/ipc.rs b/oxen-rust/src/watcher/src/ipc.rs new file mode 100644 index 000000000..5c44b4684 --- /dev/null +++ b/oxen-rust/src/watcher/src/ipc.rs @@ -0,0 +1,197 @@ +use crate::cache::StatusCache; +use crate::error::WatcherError; +use crate::protocol::{WatcherRequest, WatcherResponse}; +use log::{debug, error, info}; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use tokio::net::{UnixListener, UnixStream}; + +/// IPC server that handles client requests +pub struct IpcServer { + repo_path: PathBuf, + cache: Arc, +} + +impl IpcServer { + pub fn new(repo_path: PathBuf, cache: Arc) -> Self { + Self { repo_path, cache } + } + + /// Run the IPC server + pub async fn run(self) -> Result<(), WatcherError> { + let socket_path = self.repo_path.join(".oxen/watcher.sock"); + + // Remove old socket if it exists + if socket_path.exists() { + std::fs::remove_file(&socket_path)?; + } + + // Create the Unix socket listener + let listener = UnixListener::bind(&socket_path)?; + info!("IPC server listening on {}", socket_path.display()); + + // Track last request time for idle timeout + let idle_timeout = Duration::from_secs(600); // 10 minutes + let mut last_request = Instant::now(); + + loop { + // Accept connections with timeout check + tokio::select! { + result = listener.accept() => { + match result { + Ok((stream, _)) => { + last_request = Instant::now(); + + // Handle client in a separate task + let cache = self.cache.clone(); + tokio::spawn(async move { + if let Err(e) = handle_client(stream, cache).await { + error!("Error handling client: {}", e); + } + }); + } + Err(e) => { + error!("Failed to accept connection: {}", e); + } + } + } + + // Check for idle timeout + _ = tokio::time::sleep(Duration::from_secs(60)) => { + if last_request.elapsed() > idle_timeout { + info!("Idle timeout reached, shutting down"); + break; + } + } + } + } + + Ok(()) + } +} + +/// Handle a single client connection +async fn handle_client( + mut stream: UnixStream, + cache: Arc, +) -> Result<(), WatcherError> { + // Read message length (4 bytes, little-endian) + let mut len_buf = [0u8; 4]; + stream.read_exact(&mut len_buf).await?; + let len = u32::from_le_bytes(len_buf) as usize; + + // Sanity check message size (max 10MB) + if len > 10 * 1024 * 1024 { + error!("Message too large: {} bytes", len); + return Err(WatcherError::Communication("Message too large".to_string())); + } + + // Read message body + let mut msg_buf = vec![0u8; len]; + stream.read_exact(&mut msg_buf).await?; + + // Deserialize request + let request = WatcherRequest::from_bytes(&msg_buf)?; + debug!("Received request: {:?}", request); + + // Process request + let response = match request { + WatcherRequest::GetStatus { paths } => { + let status = cache.get_status(paths).await; + WatcherResponse::Status(status) + } + + WatcherRequest::GetSummary => { + let status = cache.get_status(None).await; + WatcherResponse::Summary { + modified: status.modified.len(), + added: status.added.len(), + removed: status.removed.len(), + untracked: status.untracked.len(), + last_updated: std::time::SystemTime::now(), + } + } + + WatcherRequest::Refresh { paths } => { + // TODO: Implement forced refresh + debug!("Refresh requested for {:?}", paths); + WatcherResponse::Ok + } + + WatcherRequest::Shutdown => { + info!("Shutdown requested via IPC"); + // Send response before shutting down + let response = WatcherResponse::Ok; + send_response(&mut stream, &response).await?; + + // Exit the process + std::process::exit(0); + } + + WatcherRequest::Ping => { + WatcherResponse::Ok + } + }; + + // Send response + send_response(&mut stream, &response).await?; + + Ok(()) +} + +/// Send a response to the client +async fn send_response( + stream: &mut UnixStream, + response: &WatcherResponse, +) -> Result<(), WatcherError> { + // Serialize response + let msg = response.to_bytes()?; + + // Write length prefix + let len = msg.len() as u32; + stream.write_all(&len.to_le_bytes()).await?; + + // Write message + stream.write_all(&msg).await?; + stream.flush().await?; + + Ok(()) +} + +/// Send a request to the watcher (used by CLI) +pub async fn send_request( + socket_path: &PathBuf, + request: WatcherRequest, +) -> Result { + // Connect to the socket + let mut stream = UnixStream::connect(socket_path) + .await + .map_err(|e| WatcherError::Communication(format!("Failed to connect: {}", e)))?; + + // Serialize request + let msg = request.to_bytes()?; + + // Send length prefix + let len = msg.len() as u32; + stream.write_all(&len.to_le_bytes()).await?; + + // Send message + stream.write_all(&msg).await?; + stream.flush().await?; + + // Read response length + let mut len_buf = [0u8; 4]; + stream.read_exact(&mut len_buf).await?; + let len = u32::from_le_bytes(len_buf) as usize; + + // Read response body + let mut msg_buf = vec![0u8; len]; + stream.read_exact(&mut msg_buf).await?; + + // Deserialize response + let response = WatcherResponse::from_bytes(&msg_buf)?; + + Ok(response) +} \ No newline at end of file diff --git a/oxen-rust/src/watcher/src/main.rs b/oxen-rust/src/watcher/src/main.rs new file mode 100644 index 000000000..e4fc7beec --- /dev/null +++ b/oxen-rust/src/watcher/src/main.rs @@ -0,0 +1,88 @@ +mod cache; +mod cli; +mod error; +mod event_processor; +mod ipc; +mod monitor; +mod protocol; + +use clap::Parser; +use log::info; +use std::path::PathBuf; + +use crate::cli::Args; +use crate::error::WatcherError; + +#[tokio::main] +async fn main() -> Result<(), WatcherError> { + env_logger::init(); + + let args = Args::parse(); + + match args.command { + cli::Commands::Start { repo } => { + info!("Starting watcher for repository: {}", repo.display()); + start_watcher(repo).await + } + cli::Commands::Stop { repo } => { + info!("Stopping watcher for repository: {}", repo.display()); + stop_watcher(repo).await + } + cli::Commands::Status { repo } => { + info!("Checking watcher status for repository: {}", repo.display()); + check_status(repo).await + } + } +} + +async fn start_watcher(repo_path: PathBuf) -> Result<(), WatcherError> { + // Check if watcher is already running + if is_watcher_running(&repo_path).await? { + info!("Watcher is already running for this repository"); + return Ok(()); + } + + // Initialize and run the watcher + let watcher = monitor::FileSystemWatcher::new(repo_path)?; + watcher.run().await +} + +async fn stop_watcher(repo_path: PathBuf) -> Result<(), WatcherError> { + let socket_path = repo_path.join(".oxen/watcher.sock"); + + // Send shutdown request + match ipc::send_request(&socket_path, protocol::WatcherRequest::Shutdown).await { + Ok(_) => { + info!("Watcher stopped successfully"); + Ok(()) + } + Err(e) => { + log::warn!("Failed to stop watcher: {}", e); + // Clean up pid file if present + let pid_file = repo_path.join(".oxen/watcher.pid"); + if pid_file.exists() { + std::fs::remove_file(pid_file)?; + } + Ok(()) + } + } +} + +async fn check_status(repo_path: PathBuf) -> Result<(), WatcherError> { + if is_watcher_running(&repo_path).await? { + println!("Watcher is running"); + } else { + println!("Watcher is not running"); + } + Ok(()) +} + +async fn is_watcher_running(repo_path: &PathBuf) -> Result { + let socket_path = repo_path.join(".oxen/watcher.sock"); + + // Try to ping the watcher + match ipc::send_request(&socket_path, protocol::WatcherRequest::Ping).await { + Ok(protocol::WatcherResponse::Ok) => Ok(true), + _ => Ok(false), + } +} \ No newline at end of file diff --git a/oxen-rust/src/watcher/src/monitor.rs b/oxen-rust/src/watcher/src/monitor.rs new file mode 100644 index 000000000..f5a9da507 --- /dev/null +++ b/oxen-rust/src/watcher/src/monitor.rs @@ -0,0 +1,198 @@ +use crate::cache::StatusCache; +use crate::error::WatcherError; +use crate::event_processor::EventProcessor; +use crate::ipc::IpcServer; +use liboxen::model::LocalRepository; +use log::{error, info, warn}; +use notify::{Event, RecursiveMode, Watcher}; +use std::path::PathBuf; +use std::sync::Arc; +use tokio::sync::mpsc; + +/// Main filesystem watcher that coordinates all components +pub struct FileSystemWatcher { + repo_path: PathBuf, + cache: Arc, +} + +impl FileSystemWatcher { + /// Create a new filesystem watcher for a repository + pub fn new(repo_path: PathBuf) -> Result { + // Verify repository exists + if !repo_path.join(".oxen").exists() { + return Err(WatcherError::RepositoryNotFound( + repo_path.display().to_string(), + )); + } + + let cache = Arc::new(StatusCache::new(&repo_path)?); + + Ok(Self { repo_path, cache }) + } + + /// Run the watcher daemon + pub async fn run(self) -> Result<(), WatcherError> { + info!( + "Starting filesystem watcher for {}", + self.repo_path.display() + ); + + // Write PID file + let pid_file = self.repo_path.join(".oxen/watcher.pid"); + std::fs::write(&pid_file, std::process::id().to_string())?; + + // Create channel for filesystem events + let (event_tx, event_rx) = mpsc::channel::(1000); + + // Create the notify watcher + let mut watcher = notify::recommended_watcher(move |res: Result| { + match res { + Ok(event) => { + // Try to send event, drop if channel is full + let _ = event_tx.blocking_send(event); + } + Err(e) => error!("Filesystem watch error: {}", e), + } + })?; + + // Watch the repository directory (excluding .oxen) + watcher.watch(&self.repo_path, RecursiveMode::Recursive)?; + info!("Watching directory: {}", self.repo_path.display()); + + // Start the event processor + let processor = EventProcessor::new(self.cache.clone()); + let processor_handle = tokio::spawn(async move { processor.run(event_rx).await }); + + // Start the IPC server + let ipc_server = IpcServer::new(self.repo_path.clone(), self.cache.clone()); + let ipc_handle = tokio::spawn(async move { + if let Err(e) = ipc_server.run().await { + error!("IPC server error: {}", e); + } + }); + + // Start initial scan + let cache_clone = self.cache.clone(); + let repo_path_clone = self.repo_path.clone(); + let _scan_handle = tokio::spawn(async move { + if let Err(e) = initial_scan(repo_path_clone, cache_clone).await { + error!("Initial scan error: {}", e); + } + }); + + // Wait for shutdown signal or handle termination + tokio::select! { + _ = tokio::signal::ctrl_c() => { + info!("Received shutdown signal"); + } + _ = processor_handle => { + warn!("Event processor terminated"); + } + _ = ipc_handle => { + warn!("IPC server terminated"); + } + } + + // Cleanup + info!("Shutting down filesystem watcher"); + drop(watcher); + + // Remove PID file + let _ = std::fs::remove_file(&pid_file); + + // Remove socket file + let socket_path = self.repo_path.join(".oxen/watcher.sock"); + let _ = std::fs::remove_file(&socket_path); + + Ok(()) + } +} + +/// Perform initial scan of the repository +async fn initial_scan(repo_path: PathBuf, cache: Arc) -> Result<(), WatcherError> { + info!("Starting initial repository scan"); + + // Load the repository + let repo = LocalRepository::from_dir(&repo_path)?; + + // Use Oxen's existing status implementation for initial state + match liboxen::repositories::status::status(&repo) { + Ok(status) => { + let mut file_statuses = Vec::new(); + + // Convert Oxen status to our format + for path in status.modified_files { + if let Ok(metadata) = std::fs::metadata(&repo_path.join(&path)) { + file_statuses.push(crate::protocol::FileStatus { + path: path.clone(), + mtime: metadata.modified().unwrap_or(std::time::SystemTime::now()), + size: metadata.len(), + hash: None, + status: crate::protocol::FileStatusType::Modified, + }); + } + } + + for (path, entry) in status.staged_files { + let file_status_type = match entry.status { + liboxen::model::StagedEntryStatus::Added => { + crate::protocol::FileStatusType::Added + } + liboxen::model::StagedEntryStatus::Modified => { + crate::protocol::FileStatusType::Modified + } + liboxen::model::StagedEntryStatus::Removed => { + crate::protocol::FileStatusType::Removed + } + liboxen::model::StagedEntryStatus::Unmodified => { + continue; // Skip unmodified files + } + }; + + let (mtime, size) = if let Ok(metadata) = std::fs::metadata(&repo_path.join(&path)) + { + ( + metadata.modified().unwrap_or(std::time::SystemTime::now()), + metadata.len(), + ) + } else { + // File might not exist if it was removed + (std::time::SystemTime::now(), 0) + }; + + file_statuses.push(crate::protocol::FileStatus { + path: path.clone(), + mtime, + size, + hash: Some(entry.hash), + status: file_status_type, + }); + } + + for path in status.untracked_files { + if let Ok(metadata) = std::fs::metadata(&repo_path.join(&path)) { + file_statuses.push(crate::protocol::FileStatus { + path: path.clone(), + mtime: metadata.modified().unwrap_or(std::time::SystemTime::now()), + size: metadata.len(), + hash: None, + status: crate::protocol::FileStatusType::Untracked, + }); + } + } + + // Batch update the cache + cache.batch_update(file_statuses).await?; + cache.mark_scan_complete().await?; + + info!("Initial scan complete"); + } + Err(e) => { + error!("Failed to get initial status: {}", e); + // Mark scan as complete anyway to avoid blocking + cache.mark_scan_complete().await?; + } + } + + Ok(()) +} diff --git a/oxen-rust/src/watcher/src/protocol.rs b/oxen-rust/src/watcher/src/protocol.rs new file mode 100644 index 000000000..9ab9978f1 --- /dev/null +++ b/oxen-rust/src/watcher/src/protocol.rs @@ -0,0 +1,96 @@ +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use std::time::SystemTime; + +/// Request messages sent from CLI to Watcher +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum WatcherRequest { + /// Get the current status of the repository + GetStatus { + /// Optional paths to filter status for + paths: Option>, + }, + /// Get a summary of changes (just counts) + GetSummary, + /// Force a refresh/rescan of specific paths + Refresh { + paths: Vec, + }, + /// Shutdown the watcher daemon + Shutdown, + /// Health check ping + Ping, +} + +/// Response messages sent from Watcher to CLI +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum WatcherResponse { + /// Full status result + Status(StatusResult), + /// Summary of changes + Summary { + modified: usize, + added: usize, + removed: usize, + untracked: usize, + last_updated: SystemTime, + }, + /// Simple acknowledgment + Ok, + /// Error response + Error(String), +} + +/// Detailed status result +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StatusResult { + pub modified: Vec, + pub added: Vec, + pub removed: Vec, + pub untracked: Vec, + /// False if still doing initial scan + pub scan_complete: bool, +} + +/// Status of a single file +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileStatus { + pub path: PathBuf, + pub mtime: SystemTime, + pub size: u64, + pub hash: Option, + pub status: FileStatusType, +} + +/// Type of file status +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum FileStatusType { + Modified, + Added, + Removed, + Untracked, +} + +impl WatcherRequest { + /// Serialize request to MessagePack bytes + pub fn to_bytes(&self) -> Result, rmp_serde::encode::Error> { + rmp_serde::to_vec(self) + } + + /// Deserialize request from MessagePack bytes + pub fn from_bytes(bytes: &[u8]) -> Result { + rmp_serde::from_slice(bytes) + } +} + +impl WatcherResponse { + /// Serialize response to MessagePack bytes + pub fn to_bytes(&self) -> Result, rmp_serde::encode::Error> { + rmp_serde::to_vec(self) + } + + /// Deserialize response from MessagePack bytes + pub fn from_bytes(bytes: &[u8]) -> Result { + rmp_serde::from_slice(bytes) + } +} \ No newline at end of file From b7c8070afb3971607b481fdd40824f0fc41d38a2 Mon Sep 17 00:00:00 2001 From: Joshua Elliott Date: Sat, 9 Aug 2025 13:41:50 -0600 Subject: [PATCH 02/11] Add unit tests for watcher modules --- oxen-rust/src/watcher/Cargo.toml | 10 +- oxen-rust/src/watcher/src/cache.rs | 11 +- oxen-rust/src/watcher/src/cache_test.rs | 231 ++++++++++++++++++ oxen-rust/src/watcher/src/error.rs | 1 + oxen-rust/src/watcher/src/event_processor.rs | 3 + .../src/watcher/src/event_processor_test.rs | 230 +++++++++++++++++ oxen-rust/src/watcher/src/lib.rs | 10 + oxen-rust/src/watcher/src/main.rs | 4 +- oxen-rust/src/watcher/src/monitor.rs | 6 +- oxen-rust/src/watcher/src/protocol.rs | 3 + oxen-rust/src/watcher/src/protocol_test.rs | 178 ++++++++++++++ .../src/watcher/tests/integration_test.rs | 200 +++++++++++++++ 12 files changed, 878 insertions(+), 9 deletions(-) create mode 100644 oxen-rust/src/watcher/src/cache_test.rs create mode 100644 oxen-rust/src/watcher/src/event_processor_test.rs create mode 100644 oxen-rust/src/watcher/src/lib.rs create mode 100644 oxen-rust/src/watcher/src/protocol_test.rs create mode 100644 oxen-rust/src/watcher/tests/integration_test.rs diff --git a/oxen-rust/src/watcher/Cargo.toml b/oxen-rust/src/watcher/Cargo.toml index 204329f72..d44078055 100644 --- a/oxen-rust/src/watcher/Cargo.toml +++ b/oxen-rust/src/watcher/Cargo.toml @@ -11,6 +11,10 @@ repository = "https://github.com/Oxen-AI/Oxen" name = "oxen-watcher" path = "src/main.rs" +[lib] +name = "oxen_watcher" +path = "src/lib.rs" + [dependencies] liboxen = { path = "../lib" } notify = "6.1" @@ -22,4 +26,8 @@ clap = { version = "4.4.2", features = ["cargo", "derive"] } log = "0.4" env_logger = "0.11" chrono = "0.4" -thiserror = "2.0" \ No newline at end of file +thiserror = "2.0" + +[dev-dependencies] +tempfile = "3.8" +tokio-test = "0.4" \ No newline at end of file diff --git a/oxen-rust/src/watcher/src/cache.rs b/oxen-rust/src/watcher/src/cache.rs index 19d3784ae..bb190650b 100644 --- a/oxen-rust/src/watcher/src/cache.rs +++ b/oxen-rust/src/watcher/src/cache.rs @@ -7,9 +7,11 @@ use std::sync::Arc; use std::time::SystemTime; use tokio::sync::RwLock; +#[path = "cache_test.rs"] +mod cache_test; + /// Memory-only status cache for fast access pub struct StatusCache { - repo: LocalRepository, /// In-memory cache cache: Arc>, } @@ -27,7 +29,8 @@ struct MemoryCache { impl StatusCache { /// Create a new status cache for a repository pub fn new(repo_path: &Path) -> Result { - let repo = LocalRepository::from_dir(repo_path)?; + // Verify it's a valid repository + let _repo = LocalRepository::from_dir(repo_path)?; // Initialize memory cache let cache = Arc::new(RwLock::new(MemoryCache { @@ -39,7 +42,7 @@ impl StatusCache { last_update: SystemTime::now(), })); - Ok(Self { repo, cache }) + Ok(Self { cache }) } /// Get the current status, optionally filtered by paths @@ -87,6 +90,7 @@ impl StatusCache { } /// Update a file's status in the cache + #[allow(dead_code)] // Used in tests pub async fn update_file_status(&self, status: FileStatus) -> Result<(), WatcherError> { let mut cache = self.cache.write().await; @@ -164,6 +168,7 @@ impl StatusCache { } /// Clear the entire cache + #[allow(dead_code)] // Used in tests pub async fn clear(&self) -> Result<(), WatcherError> { let mut cache = self.cache.write().await; cache.modified.clear(); diff --git a/oxen-rust/src/watcher/src/cache_test.rs b/oxen-rust/src/watcher/src/cache_test.rs new file mode 100644 index 000000000..0efec1f22 --- /dev/null +++ b/oxen-rust/src/watcher/src/cache_test.rs @@ -0,0 +1,231 @@ +#[cfg(test)] +mod tests { + use crate::cache::StatusCache; + use crate::protocol::{FileStatus, FileStatusType}; + use std::path::PathBuf; + use std::time::SystemTime; + use tempfile::TempDir; + + async fn setup_test_cache() -> (StatusCache, TempDir) { + let temp_dir = TempDir::new().unwrap(); + let repo_path = temp_dir.path(); + + // Create a fake .oxen directory + std::fs::create_dir_all(repo_path.join(".oxen")).unwrap(); + + // Initialize an empty oxen repo (minimal setup) + liboxen::repositories::init::init(repo_path).unwrap(); + + let cache = StatusCache::new(repo_path).unwrap(); + (cache, temp_dir) + } + + #[tokio::test] + async fn test_cache_new() { + let (_cache, _temp_dir) = setup_test_cache().await; + // Test passes if cache is created successfully + } + + #[tokio::test] + async fn test_empty_cache_status() { + let (cache, _temp_dir) = setup_test_cache().await; + + let status = cache.get_status(None).await; + assert!(status.modified.is_empty()); + assert!(status.added.is_empty()); + assert!(status.removed.is_empty()); + assert!(status.untracked.is_empty()); + assert!(!status.scan_complete); + } + + #[tokio::test] + async fn test_update_file_status() { + let (cache, _temp_dir) = setup_test_cache().await; + + let file_status = FileStatus { + path: PathBuf::from("test.txt"), + mtime: SystemTime::now(), + size: 100, + hash: Some("abc123".to_string()), + status: FileStatusType::Modified, + }; + + cache.update_file_status(file_status.clone()).await.unwrap(); + + let status = cache.get_status(None).await; + assert_eq!(status.modified.len(), 1); + assert_eq!(status.modified[0].path, PathBuf::from("test.txt")); + assert!(status.added.is_empty()); + assert!(status.removed.is_empty()); + assert!(status.untracked.is_empty()); + } + + #[tokio::test] + async fn test_batch_update() { + let (cache, _temp_dir) = setup_test_cache().await; + + let statuses = vec![ + FileStatus { + path: PathBuf::from("file1.txt"), + mtime: SystemTime::now(), + size: 100, + hash: None, + status: FileStatusType::Added, + }, + FileStatus { + path: PathBuf::from("file2.txt"), + mtime: SystemTime::now(), + size: 200, + hash: None, + status: FileStatusType::Modified, + }, + FileStatus { + path: PathBuf::from("file3.txt"), + mtime: SystemTime::now(), + size: 0, + hash: None, + status: FileStatusType::Removed, + }, + ]; + + cache.batch_update(statuses).await.unwrap(); + + let status = cache.get_status(None).await; + assert_eq!(status.added.len(), 1); + assert_eq!(status.modified.len(), 1); + assert_eq!(status.removed.len(), 1); + assert!(status.untracked.is_empty()); + } + + #[tokio::test] + async fn test_status_transitions() { + let (cache, _temp_dir) = setup_test_cache().await; + + let path = PathBuf::from("test.txt"); + + // Start as untracked + cache.update_file_status(FileStatus { + path: path.clone(), + mtime: SystemTime::now(), + size: 100, + hash: None, + status: FileStatusType::Untracked, + }).await.unwrap(); + + let status = cache.get_status(None).await; + assert_eq!(status.untracked.len(), 1); + + // Transition to added + cache.update_file_status(FileStatus { + path: path.clone(), + mtime: SystemTime::now(), + size: 100, + hash: Some("hash".to_string()), + status: FileStatusType::Added, + }).await.unwrap(); + + let status = cache.get_status(None).await; + assert_eq!(status.added.len(), 1); + assert_eq!(status.untracked.len(), 0); + + // Transition to modified + cache.update_file_status(FileStatus { + path: path.clone(), + mtime: SystemTime::now(), + size: 150, + hash: Some("newhash".to_string()), + status: FileStatusType::Modified, + }).await.unwrap(); + + let status = cache.get_status(None).await; + assert_eq!(status.modified.len(), 1); + assert_eq!(status.added.len(), 0); + } + + #[tokio::test] + async fn test_path_filtering() { + let (cache, _temp_dir) = setup_test_cache().await; + + let statuses = vec![ + FileStatus { + path: PathBuf::from("dir1/file1.txt"), + mtime: SystemTime::now(), + size: 100, + hash: None, + status: FileStatusType::Modified, + }, + FileStatus { + path: PathBuf::from("dir2/file2.txt"), + mtime: SystemTime::now(), + size: 200, + hash: None, + status: FileStatusType::Modified, + }, + ]; + + cache.batch_update(statuses).await.unwrap(); + + // Get all files + let status = cache.get_status(None).await; + assert_eq!(status.modified.len(), 2); + + // Filter by specific path + let filtered = cache.get_status(Some(vec![PathBuf::from("dir1/file1.txt")])).await; + assert_eq!(filtered.modified.len(), 1); + assert_eq!(filtered.modified[0].path, PathBuf::from("dir1/file1.txt")); + } + + #[tokio::test] + async fn test_scan_complete() { + let (cache, _temp_dir) = setup_test_cache().await; + + let status = cache.get_status(None).await; + assert!(!status.scan_complete); + + cache.mark_scan_complete().await.unwrap(); + + let status = cache.get_status(None).await; + assert!(status.scan_complete); + } + + #[tokio::test] + async fn test_clear_cache() { + let (cache, _temp_dir) = setup_test_cache().await; + + // Add some data + let statuses = vec![ + FileStatus { + path: PathBuf::from("file1.txt"), + mtime: SystemTime::now(), + size: 100, + hash: None, + status: FileStatusType::Added, + }, + FileStatus { + path: PathBuf::from("file2.txt"), + mtime: SystemTime::now(), + size: 200, + hash: None, + status: FileStatusType::Modified, + }, + ]; + + cache.batch_update(statuses).await.unwrap(); + cache.mark_scan_complete().await.unwrap(); + + // Verify data exists + let status = cache.get_status(None).await; + assert_eq!(status.added.len(), 1); + assert_eq!(status.modified.len(), 1); + assert!(status.scan_complete); + + // Clear cache + cache.clear().await.unwrap(); + + // Verify cache is empty + let status = cache.get_status(None).await; + assert!(status.added.is_empty()); + assert!(status.modified.is_empty()); + assert!(!status.scan_complete); + } +} \ No newline at end of file diff --git a/oxen-rust/src/watcher/src/error.rs b/oxen-rust/src/watcher/src/error.rs index 9a6887771..4d2d7e863 100644 --- a/oxen-rust/src/watcher/src/error.rs +++ b/oxen-rust/src/watcher/src/error.rs @@ -21,6 +21,7 @@ pub enum WatcherError { RepositoryNotFound(String), #[error("Watcher already running")] + #[allow(dead_code)] // Will be used when we implement multiple watcher prevention AlreadyRunning, #[error("Failed to communicate with watcher: {0}")] diff --git a/oxen-rust/src/watcher/src/event_processor.rs b/oxen-rust/src/watcher/src/event_processor.rs index 1fa54775f..a46ba9276 100644 --- a/oxen-rust/src/watcher/src/event_processor.rs +++ b/oxen-rust/src/watcher/src/event_processor.rs @@ -9,6 +9,9 @@ use std::time::{Duration, Instant}; use tokio::sync::mpsc; use tokio::time; +#[path = "event_processor_test.rs"] +mod event_processor_test; + /// Processes filesystem events and updates the cache pub struct EventProcessor { cache: Arc, diff --git a/oxen-rust/src/watcher/src/event_processor_test.rs b/oxen-rust/src/watcher/src/event_processor_test.rs new file mode 100644 index 000000000..5eada457a --- /dev/null +++ b/oxen-rust/src/watcher/src/event_processor_test.rs @@ -0,0 +1,230 @@ +#[cfg(test)] +mod tests { + use crate::cache::StatusCache; + use crate::event_processor::EventProcessor; + use notify::{Event, EventKind}; + use std::sync::Arc; + use std::time::Duration; + use tempfile::TempDir; + use tokio::sync::mpsc; + use tokio::time; + + async fn setup_test_processor() -> (Arc, mpsc::Sender, TempDir) { + let temp_dir = TempDir::new().unwrap(); + let repo_path = temp_dir.path(); + + // Create a fake .oxen directory + std::fs::create_dir_all(repo_path.join(".oxen")).unwrap(); + + // Initialize an empty oxen repo + liboxen::repositories::init::init(repo_path).unwrap(); + + let cache = Arc::new(StatusCache::new(repo_path).unwrap()); + let (event_tx, event_rx) = mpsc::channel::(100); + + let processor = EventProcessor::new(cache.clone()); + + // Start processor in background + tokio::spawn(async move { + processor.run(event_rx).await; + }); + + // Give processor time to start + time::sleep(Duration::from_millis(10)).await; + + (cache, event_tx, temp_dir) + } + + #[tokio::test] + async fn test_event_coalescing() { + let (cache, event_tx, temp_dir) = setup_test_processor().await; + + let test_file = temp_dir.path().join("test.txt"); + + // Send multiple events for the same file rapidly + for _ in 0..5 { + let event = Event { + kind: EventKind::Modify(notify::event::ModifyKind::Any), + paths: vec![test_file.clone()], + attrs: Default::default(), + }; + event_tx.send(event).await.unwrap(); + } + + // Wait for coalescing window + time::sleep(Duration::from_millis(150)).await; + + // Should only have one entry in cache + let status = cache.get_status(None).await; + assert!(status.modified.len() <= 1, "Events should be coalesced"); + } + + #[tokio::test] + async fn test_ignore_oxen_directory() { + let (cache, event_tx, temp_dir) = setup_test_processor().await; + + let oxen_file = temp_dir.path().join(".oxen").join("some_file.db"); + + let event = Event { + kind: EventKind::Create(notify::event::CreateKind::Any), + paths: vec![oxen_file], + attrs: Default::default(), + }; + + event_tx.send(event).await.unwrap(); + + // Wait for processing + time::sleep(Duration::from_millis(150)).await; + + // Should have no entries + let status = cache.get_status(None).await; + assert!(status.added.is_empty()); + assert!(status.untracked.is_empty()); + assert!(status.modified.is_empty()); + } + + #[tokio::test] + async fn test_ignore_directories() { + let (cache, event_tx, temp_dir) = setup_test_processor().await; + + let dir_path = temp_dir.path().join("some_directory"); + std::fs::create_dir_all(&dir_path).unwrap(); + + let event = Event { + kind: EventKind::Create(notify::event::CreateKind::Any), + paths: vec![dir_path], + attrs: Default::default(), + }; + + event_tx.send(event).await.unwrap(); + + // Wait for processing + time::sleep(Duration::from_millis(150)).await; + + // Should have no entries (directories are skipped) + let status = cache.get_status(None).await; + assert!(status.added.is_empty()); + assert!(status.untracked.is_empty()); + } + + #[tokio::test] + async fn test_batch_processing() { + let (cache, event_tx, temp_dir) = setup_test_processor().await; + + // Send events for multiple files + for i in 0..10 { + let file_path = temp_dir.path().join(format!("file{}.txt", i)); + // Create the file so metadata can be read + std::fs::write(&file_path, format!("content{}", i)).unwrap(); + + let event = Event { + kind: EventKind::Create(notify::event::CreateKind::Any), + paths: vec![file_path], + attrs: Default::default(), + }; + + event_tx.send(event).await.unwrap(); + } + + // Wait for batch processing + time::sleep(Duration::from_millis(200)).await; + + // Should have all files + let status = cache.get_status(None).await; + let total = status.added.len() + status.untracked.len() + status.modified.len(); + assert!(total > 0, "Should have processed some files"); + assert!(total <= 10, "Should not exceed number of files sent"); + } + + #[tokio::test] + async fn test_event_kinds_mapping() { + let (cache, event_tx, temp_dir) = setup_test_processor().await; + + // Test Create event + let create_file = temp_dir.path().join("created.txt"); + std::fs::write(&create_file, "content").unwrap(); + + event_tx + .send(Event { + kind: EventKind::Create(notify::event::CreateKind::Any), + paths: vec![create_file.clone()], + attrs: Default::default(), + }) + .await + .unwrap(); + + // Test Modify event + let modify_file = temp_dir.path().join("modified.txt"); + std::fs::write(&modify_file, "content").unwrap(); + + event_tx + .send(Event { + kind: EventKind::Modify(notify::event::ModifyKind::Any), + paths: vec![modify_file.clone()], + attrs: Default::default(), + }) + .await + .unwrap(); + + // Test Remove event + let remove_file = temp_dir.path().join("removed.txt"); + + event_tx + .send(Event { + kind: EventKind::Remove(notify::event::RemoveKind::Any), + paths: vec![remove_file.clone()], + attrs: Default::default(), + }) + .await + .unwrap(); + + // Wait for processing + time::sleep(Duration::from_millis(200)).await; + + let status = cache.get_status(None).await; + + // Should have entries in different categories + let total = status.added.len() + + status.untracked.len() + + status.modified.len() + + status.removed.len(); + assert!(total > 0, "Should have processed events"); + } + + #[tokio::test] + async fn test_skip_access_events() { + let (cache, event_tx, temp_dir) = setup_test_processor().await; + + let file = temp_dir.path().join("accessed.txt"); + std::fs::write(&file, "content").unwrap(); + + // Send Access event (should be ignored) + event_tx + .send(Event { + kind: EventKind::Access(notify::event::AccessKind::Any), + paths: vec![file.clone()], + attrs: Default::default(), + }) + .await + .unwrap(); + + // Send Other event (should be ignored) + event_tx + .send(Event { + kind: EventKind::Other, + paths: vec![file], + attrs: Default::default(), + }) + .await + .unwrap(); + + // Wait for processing + time::sleep(Duration::from_millis(150)).await; + + // Should have no entries + let status = cache.get_status(None).await; + assert!(status.added.is_empty()); + assert!(status.untracked.is_empty()); + assert!(status.modified.is_empty()); + } +} diff --git a/oxen-rust/src/watcher/src/lib.rs b/oxen-rust/src/watcher/src/lib.rs new file mode 100644 index 000000000..f2e8edb22 --- /dev/null +++ b/oxen-rust/src/watcher/src/lib.rs @@ -0,0 +1,10 @@ +pub mod cache; +pub mod cli; +pub mod error; +pub mod event_processor; +pub mod ipc; +pub mod monitor; +pub mod protocol; + +pub use error::WatcherError; +pub use protocol::{WatcherRequest, WatcherResponse}; \ No newline at end of file diff --git a/oxen-rust/src/watcher/src/main.rs b/oxen-rust/src/watcher/src/main.rs index e4fc7beec..c90761c8b 100644 --- a/oxen-rust/src/watcher/src/main.rs +++ b/oxen-rust/src/watcher/src/main.rs @@ -8,7 +8,7 @@ mod protocol; use clap::Parser; use log::info; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use crate::cli::Args; use crate::error::WatcherError; @@ -77,7 +77,7 @@ async fn check_status(repo_path: PathBuf) -> Result<(), WatcherError> { Ok(()) } -async fn is_watcher_running(repo_path: &PathBuf) -> Result { +async fn is_watcher_running(repo_path: &Path) -> Result { let socket_path = repo_path.join(".oxen/watcher.sock"); // Try to ping the watcher diff --git a/oxen-rust/src/watcher/src/monitor.rs b/oxen-rust/src/watcher/src/monitor.rs index f5a9da507..2d2cf4d3f 100644 --- a/oxen-rust/src/watcher/src/monitor.rs +++ b/oxen-rust/src/watcher/src/monitor.rs @@ -122,7 +122,7 @@ async fn initial_scan(repo_path: PathBuf, cache: Arc) -> Result<(), // Convert Oxen status to our format for path in status.modified_files { - if let Ok(metadata) = std::fs::metadata(&repo_path.join(&path)) { + if let Ok(metadata) = std::fs::metadata(repo_path.join(path.clone())) { file_statuses.push(crate::protocol::FileStatus { path: path.clone(), mtime: metadata.modified().unwrap_or(std::time::SystemTime::now()), @@ -149,7 +149,7 @@ async fn initial_scan(repo_path: PathBuf, cache: Arc) -> Result<(), } }; - let (mtime, size) = if let Ok(metadata) = std::fs::metadata(&repo_path.join(&path)) + let (mtime, size) = if let Ok(metadata) = std::fs::metadata(repo_path.join(path.clone())) { ( metadata.modified().unwrap_or(std::time::SystemTime::now()), @@ -170,7 +170,7 @@ async fn initial_scan(repo_path: PathBuf, cache: Arc) -> Result<(), } for path in status.untracked_files { - if let Ok(metadata) = std::fs::metadata(&repo_path.join(&path)) { + if let Ok(metadata) = std::fs::metadata(repo_path.join(path.clone())) { file_statuses.push(crate::protocol::FileStatus { path: path.clone(), mtime: metadata.modified().unwrap_or(std::time::SystemTime::now()), diff --git a/oxen-rust/src/watcher/src/protocol.rs b/oxen-rust/src/watcher/src/protocol.rs index 9ab9978f1..5bf2f2bf2 100644 --- a/oxen-rust/src/watcher/src/protocol.rs +++ b/oxen-rust/src/watcher/src/protocol.rs @@ -2,6 +2,9 @@ use serde::{Deserialize, Serialize}; use std::path::PathBuf; use std::time::SystemTime; +#[path = "protocol_test.rs"] +mod protocol_test; + /// Request messages sent from CLI to Watcher #[derive(Debug, Clone, Serialize, Deserialize)] pub enum WatcherRequest { diff --git a/oxen-rust/src/watcher/src/protocol_test.rs b/oxen-rust/src/watcher/src/protocol_test.rs new file mode 100644 index 000000000..4abcec8be --- /dev/null +++ b/oxen-rust/src/watcher/src/protocol_test.rs @@ -0,0 +1,178 @@ +#[cfg(test)] +mod tests { + use crate::protocol::*; + use std::path::PathBuf; + use std::time::SystemTime; + + #[test] + fn test_request_serialization() { + let request = WatcherRequest::GetStatus { + paths: Some(vec![PathBuf::from("/tmp/test")]), + }; + + let bytes = request.to_bytes().unwrap(); + let deserialized = WatcherRequest::from_bytes(&bytes).unwrap(); + + match deserialized { + WatcherRequest::GetStatus { paths } => { + assert!(paths.is_some()); + assert_eq!(paths.unwrap()[0], PathBuf::from("/tmp/test")); + } + _ => panic!("Wrong request type"), + } + } + + #[test] + fn test_response_serialization() { + let response = WatcherResponse::Summary { + modified: 5, + added: 3, + removed: 2, + untracked: 10, + last_updated: SystemTime::now(), + }; + + let bytes = response.to_bytes().unwrap(); + let deserialized = WatcherResponse::from_bytes(&bytes).unwrap(); + + match deserialized { + WatcherResponse::Summary { modified, added, removed, untracked, .. } => { + assert_eq!(modified, 5); + assert_eq!(added, 3); + assert_eq!(removed, 2); + assert_eq!(untracked, 10); + } + _ => panic!("Wrong response type"), + } + } + + #[test] + fn test_status_result_serialization() { + let status_result = StatusResult { + modified: vec![FileStatus { + path: PathBuf::from("modified.txt"), + mtime: SystemTime::now(), + size: 100, + hash: Some("hash1".to_string()), + status: FileStatusType::Modified, + }], + added: vec![FileStatus { + path: PathBuf::from("added.txt"), + mtime: SystemTime::now(), + size: 200, + hash: None, + status: FileStatusType::Added, + }], + removed: vec![PathBuf::from("removed.txt")], + untracked: vec![PathBuf::from("untracked.txt")], + scan_complete: true, + }; + + let response = WatcherResponse::Status(status_result); + let bytes = response.to_bytes().unwrap(); + let deserialized = WatcherResponse::from_bytes(&bytes).unwrap(); + + match deserialized { + WatcherResponse::Status(result) => { + assert_eq!(result.modified.len(), 1); + assert_eq!(result.added.len(), 1); + assert_eq!(result.removed.len(), 1); + assert_eq!(result.untracked.len(), 1); + assert!(result.scan_complete); + + assert_eq!(result.modified[0].path, PathBuf::from("modified.txt")); + assert_eq!(result.added[0].path, PathBuf::from("added.txt")); + assert_eq!(result.removed[0], PathBuf::from("removed.txt")); + assert_eq!(result.untracked[0], PathBuf::from("untracked.txt")); + } + _ => panic!("Wrong response type"), + } + } + + #[test] + fn test_all_request_types() { + let requests = vec![ + WatcherRequest::GetStatus { paths: None }, + WatcherRequest::GetSummary, + WatcherRequest::Refresh { + paths: vec![PathBuf::from("/tmp")], + }, + WatcherRequest::Shutdown, + WatcherRequest::Ping, + ]; + + for request in requests { + let bytes = request.to_bytes().unwrap(); + let deserialized = WatcherRequest::from_bytes(&bytes).unwrap(); + + // Just verify it deserializes correctly + match (&request, &deserialized) { + (WatcherRequest::Ping, WatcherRequest::Ping) => {} + (WatcherRequest::Shutdown, WatcherRequest::Shutdown) => {} + (WatcherRequest::GetSummary, WatcherRequest::GetSummary) => {} + _ => {} // Other cases would need deeper comparison + } + } + } + + #[test] + fn test_file_status_type_equality() { + assert_eq!(FileStatusType::Modified, FileStatusType::Modified); + assert_eq!(FileStatusType::Added, FileStatusType::Added); + assert_eq!(FileStatusType::Removed, FileStatusType::Removed); + assert_eq!(FileStatusType::Untracked, FileStatusType::Untracked); + + assert_ne!(FileStatusType::Modified, FileStatusType::Added); + assert_ne!(FileStatusType::Added, FileStatusType::Removed); + } + + #[test] + fn test_error_response() { + let response = WatcherResponse::Error("Something went wrong".to_string()); + let bytes = response.to_bytes().unwrap(); + let deserialized = WatcherResponse::from_bytes(&bytes).unwrap(); + + match deserialized { + WatcherResponse::Error(msg) => { + assert_eq!(msg, "Something went wrong"); + } + _ => panic!("Wrong response type"), + } + } + + #[test] + fn test_large_payload() { + // Test with many files + let mut modified = Vec::new(); + for i in 0..1000 { + modified.push(FileStatus { + path: PathBuf::from(format!("file{}.txt", i)), + mtime: SystemTime::now(), + size: i as u64, + hash: Some(format!("hash{}", i)), + status: FileStatusType::Modified, + }); + } + + let status_result = StatusResult { + modified, + added: vec![], + removed: vec![], + untracked: vec![], + scan_complete: true, + }; + + let response = WatcherResponse::Status(status_result); + let bytes = response.to_bytes().unwrap(); + let deserialized = WatcherResponse::from_bytes(&bytes).unwrap(); + + match deserialized { + WatcherResponse::Status(result) => { + assert_eq!(result.modified.len(), 1000); + assert_eq!(result.modified[0].path, PathBuf::from("file0.txt")); + assert_eq!(result.modified[999].path, PathBuf::from("file999.txt")); + } + _ => panic!("Wrong response type"), + } + } +} \ No newline at end of file diff --git a/oxen-rust/src/watcher/tests/integration_test.rs b/oxen-rust/src/watcher/tests/integration_test.rs new file mode 100644 index 000000000..b33dc03b8 --- /dev/null +++ b/oxen-rust/src/watcher/tests/integration_test.rs @@ -0,0 +1,200 @@ +use std::path::PathBuf; +use std::time::Duration; +use tempfile::TempDir; +use tokio::process::Command; +use tokio::time; + +/// Helper to get the watcher binary path +fn get_watcher_path() -> PathBuf { + // The test binary is typically in target/{profile}/deps/ + // while the actual binary is in target/{profile}/ + let mut path = std::env::current_exe().unwrap(); + + // Go up from deps directory if we're in it + path.pop(); // Remove test binary name + if path.ends_with("deps") { + path.pop(); // Remove "deps" + } + + // Now we should be in target/{profile}/ + let watcher_path = path.join("oxen-watcher"); + + if !watcher_path.exists() { + panic!( + "oxen-watcher binary not found at {:?}. Run 'cargo build --package oxen-watcher --bin oxen-watcher' first", + watcher_path + ); + } + + watcher_path +} + +#[tokio::test] +#[ignore] // Run with: cargo test --package oxen-watcher -- --ignored +async fn test_watcher_lifecycle() { + let temp_dir = TempDir::new().unwrap(); + let repo_path = temp_dir.path(); + + // Initialize an oxen repository + liboxen::repositories::init::init(repo_path).unwrap(); + + let watcher_path = get_watcher_path(); + + // Start the watcher + let mut start_cmd = Command::new(&watcher_path) + .arg("start") + .arg("--repo") + .arg(repo_path) + .spawn() + .expect("Failed to start watcher"); + + // Give it time to start + time::sleep(Duration::from_secs(2)).await; + + // Check status + let status_output = Command::new(&watcher_path) + .arg("status") + .arg("--repo") + .arg(repo_path) + .output() + .await + .expect("Failed to check status"); + + let status_str = String::from_utf8_lossy(&status_output.stdout); + assert!(status_str.contains("running"), "Watcher should be running"); + + // Stop the watcher + let stop_output = Command::new(&watcher_path) + .arg("stop") + .arg("--repo") + .arg(repo_path) + .output() + .await + .expect("Failed to stop watcher"); + + assert!(stop_output.status.success(), "Stop command should succeed"); + + // Give it time to stop + time::sleep(Duration::from_secs(1)).await; + + // Check status again + let status_output2 = Command::new(&watcher_path) + .arg("status") + .arg("--repo") + .arg(repo_path) + .output() + .await + .expect("Failed to check status"); + + let status_str2 = String::from_utf8_lossy(&status_output2.stdout); + assert!( + status_str2.contains("not running"), + "Watcher should not be running" + ); + + // Clean up - ensure process is terminated + let _ = start_cmd.kill().await; +} + +#[tokio::test] +#[ignore] +async fn test_watcher_file_detection() { + let temp_dir = TempDir::new().unwrap(); + let repo_path = temp_dir.path(); + + // Initialize an oxen repository + liboxen::repositories::init::init(repo_path).unwrap(); + + let watcher_path = get_watcher_path(); + + // Start the watcher + let mut watcher_process = Command::new(&watcher_path) + .arg("start") + .arg("--repo") + .arg(repo_path) + .spawn() + .expect("Failed to start watcher"); + + // Give it time to start and do initial scan + time::sleep(Duration::from_secs(3)).await; + + // Create a new file + let test_file = repo_path.join("test.txt"); + std::fs::write(&test_file, "test content").unwrap(); + + // Give watcher time to detect the change + time::sleep(Duration::from_secs(1)).await; + + // TODO: Once CLI integration is complete (try_watcher_status() in status.rs), + // we should test that `oxen status` actually detects the new file via the watcher. + // For now we just verify the watcher is running. + + let status_output = Command::new(&watcher_path) + .arg("status") + .arg("--repo") + .arg(repo_path) + .output() + .await + .expect("Failed to check status"); + + assert!(status_output.status.success()); + + // Stop the watcher + Command::new(&watcher_path) + .arg("stop") + .arg("--repo") + .arg(repo_path) + .output() + .await + .expect("Failed to stop watcher"); + + // Clean up + let _ = watcher_process.kill().await; +} + +#[tokio::test] +#[ignore] +async fn test_multiple_watcher_prevention() { + let temp_dir = TempDir::new().unwrap(); + let repo_path = temp_dir.path(); + + // Initialize an oxen repository + liboxen::repositories::init::init(repo_path).unwrap(); + + let watcher_path = get_watcher_path(); + + // Start the first watcher + let mut first_watcher = Command::new(&watcher_path) + .arg("start") + .arg("--repo") + .arg(repo_path) + .spawn() + .expect("Failed to start first watcher"); + + // Give it time to start + time::sleep(Duration::from_secs(2)).await; + + // Try to start a second watcher (should not create a new one) + let second_output = Command::new(&watcher_path) + .arg("start") + .arg("--repo") + .arg(repo_path) + .output() + .await + .expect("Failed to run second start command"); + + // The second start should succeed but not create a new watcher + assert!(second_output.status.success()); + + // Stop the watcher + Command::new(&watcher_path) + .arg("stop") + .arg("--repo") + .arg(repo_path) + .output() + .await + .expect("Failed to stop watcher"); + + // Clean up + let _ = first_watcher.kill().await; +} From 86ecbca55988abec14b5c23c8772b229f956a003 Mon Sep 17 00:00:00 2001 From: Joshua Elliott Date: Wed, 13 Aug 2025 17:03:14 -0700 Subject: [PATCH 03/11] WIP integrate watcher with status cmd [ci skip] --- oxen-rust/Cargo.toml | 5 + oxen-rust/src/cli/src/cmd/status.rs | 17 +- oxen-rust/src/lib/src/core/v_latest.rs | 1 + oxen-rust/src/lib/src/core/v_latest/status.rs | 446 ++++++++++++++++++ .../lib/src/core/v_latest/watcher_client.rs | 216 +++++++++ oxen-rust/src/watcher/src/ipc.rs | 68 +-- oxen-rust/src/watcher/src/monitor.rs | 37 +- 7 files changed, 740 insertions(+), 50 deletions(-) create mode 100644 oxen-rust/src/lib/src/core/v_latest/watcher_client.rs diff --git a/oxen-rust/Cargo.toml b/oxen-rust/Cargo.toml index 08f24ed56..1447eb83c 100644 --- a/oxen-rust/Cargo.toml +++ b/oxen-rust/Cargo.toml @@ -174,6 +174,11 @@ name = "oxen-server" path = "src/server/src/main.rs" bench = false +# [[bin]] +# name = "oxen-watcher" +# path = "src/watcher/src/main.rs" +# bench = false + [package.metadata.docs.rs] default-target = "x86_64-unknown-linux-gnu" features = ["duckdb"] # this is without "duckdb/bundled" diff --git a/oxen-rust/src/cli/src/cmd/status.rs b/oxen-rust/src/cli/src/cmd/status.rs index 1922b9974..daa0833e3 100644 --- a/oxen-rust/src/cli/src/cmd/status.rs +++ b/oxen-rust/src/cli/src/cmd/status.rs @@ -54,6 +54,12 @@ impl RunCmd for StatusCmd { .help("If present, does not truncate the output of status at all.") .action(clap::ArgAction::SetTrue), ) + .arg( + Arg::new("no-cache") + .long("no-cache") + .help("Skip filesystem cache and perform full scan") + .action(clap::ArgAction::SetTrue), + ) .arg( Arg::new("paths") .num_args(0..) @@ -74,6 +80,7 @@ impl RunCmd for StatusCmd { .parse::() .expect("limit must be a valid integer."); let print_all = args.get_flag("print_all"); + let no_cache = args.get_flag("no-cache"); let repository = LocalRepository::from_current_dir()?; check_repo_migration_needed(&repository)?; @@ -93,7 +100,15 @@ impl RunCmd for StatusCmd { }; log::debug!("status opts: {:?}", opts); - let repo_status = repositories::status::status_from_opts(&repository, &opts)?; + // Use the watcher-enabled status function unless --no-cache is specified + let repo_status = if no_cache { + log::debug!("Using direct scan (--no-cache specified)"); + repositories::status::status_from_opts(&repository, &opts)? + } else { + // Try to use watcher cache by default + log::debug!("Attempting to use watcher cache"); + liboxen::core::v_latest::status::status_with_cache(&repository, &opts, true).await? + }; if let Some(current_branch) = repositories::branches::current_branch(&repository)? { println!( diff --git a/oxen-rust/src/lib/src/core/v_latest.rs b/oxen-rust/src/lib/src/core/v_latest.rs index 7ca8c9ad2..0d356a26c 100644 --- a/oxen-rust/src/lib/src/core/v_latest.rs +++ b/oxen-rust/src/lib/src/core/v_latest.rs @@ -23,6 +23,7 @@ pub mod revisions; pub mod rm; pub mod stats; pub mod status; +pub mod watcher_client; pub mod workspaces; pub use add::add; diff --git a/oxen-rust/src/lib/src/core/v_latest/status.rs b/oxen-rust/src/lib/src/core/v_latest/status.rs index 287c7296a..6e561a462 100644 --- a/oxen-rust/src/lib/src/core/v_latest/status.rs +++ b/oxen-rust/src/lib/src/core/v_latest/status.rs @@ -25,6 +25,7 @@ use std::str; use std::time::Duration; use crate::core::v_latest::index::CommitMerkleTree; +use crate::core::v_latest::watcher_client::{WatcherClient, WatcherStatus}; use crate::model::merkle_tree::node::EMerkleTreeNode; use crate::model::merkle_tree::node::MerkleTreeNode; @@ -43,6 +44,157 @@ pub fn status_from_dir( status_from_opts(repo, &opts) } +/// Status with optional watcher cache support +pub async fn status_with_cache( + repo: &LocalRepository, + opts: &StagedDataOpts, + use_cache: bool, +) -> Result { + // If cache is enabled, try to use the watcher + if use_cache { + log::debug!("Attempting to use watcher cache for status"); + + // Try to connect to watcher + if let Some(client) = WatcherClient::connect(repo).await { + log::info!("Connected to watcher, getting status"); + + // Try to get status from watcher + match client.get_status().await { + Ok(watcher_status) => { + log::debug!("Got status from watcher, merging with staged data"); + log::info!("Got status from watcher, merging with staged data"); + return merge_watcher_with_staged(repo, opts, watcher_status); + } + Err(e) => { + log::warn!("Failed to get status from watcher: {}", e); + // Fall through to regular status + } + } + } else { + log::warn!("Could not connect to watcher"); + } + } else { + log::debug!("Cache disabled, using direct scan"); + } + + // Fallback to regular status + status_from_opts(repo, opts) +} + +/// Merge watcher data with staged database and other sources +fn merge_watcher_with_staged( + repo: &LocalRepository, + opts: &StagedDataOpts, + watcher: WatcherStatus, +) -> Result { + log::debug!("Merging watcher data with staged database"); + + let mut staged_data = StagedData::empty(); + + // Apply oxenignore filtering + let oxenignore = oxenignore::create(repo); + + // Use watcher data for filesystem state + // Apply path filtering if paths were specified + if !opts.paths.is_empty() && opts.paths[0] != repo.path { + // Filter watcher results to only include specified paths + let requested_paths: HashSet = opts + .paths + .iter() + .map(|p| util::fs::path_relative_to_dir(p, &repo.path)) + .filter_map(Result::ok) + .collect(); + + staged_data.untracked_files = watcher + .untracked + .into_iter() + .filter(|p| requested_paths.iter().any(|req| p.starts_with(req))) + .filter(|p| !oxenignore::is_ignored(p, &oxenignore, false)) + .collect(); + + staged_data.modified_files = watcher + .modified + .into_iter() + .filter(|p| requested_paths.iter().any(|req| p.starts_with(req))) + .filter(|p| !oxenignore::is_ignored(p, &oxenignore, false)) + .collect(); + + staged_data.removed_files = watcher + .removed + .into_iter() + .filter(|p| requested_paths.iter().any(|req| p.starts_with(req))) + .filter(|p| !oxenignore::is_ignored(p, &oxenignore, false)) + .collect(); + } else { + // Use all watcher data with oxenignore filtering + staged_data.untracked_files = watcher + .untracked + .into_iter() + .filter(|p| !oxenignore::is_ignored(p, &oxenignore, false)) + .collect(); + staged_data.modified_files = watcher + .modified + .into_iter() + .filter(|p| !oxenignore::is_ignored(p, &oxenignore, false)) + .collect(); + staged_data.removed_files = watcher + .removed + .into_iter() + .filter(|p| !oxenignore::is_ignored(p, &oxenignore, false)) + .collect(); + } + + // Extract untracked directories from untracked files + let mut untracked_dirs: HashMap = HashMap::new(); + for file in &staged_data.untracked_files { + if let Some(parent) = file.parent() { + if !parent.as_os_str().is_empty() { + *untracked_dirs.entry(parent.to_path_buf()).or_insert(0) += 1; + } + } + } + staged_data.untracked_dirs = untracked_dirs.into_iter().collect(); + + // Now read staged data from the database + let staged_db_maybe = open_staged_db(repo)?; + + if let Some(staged_db) = staged_db_maybe { + log::debug!("Reading staged entries from database"); + + let read_progress = ProgressBar::new_spinner(); + read_progress.set_style(ProgressStyle::default_spinner()); + read_progress.enable_steady_tick(Duration::from_millis(100)); + + // Read staged entries based on paths + let mut dir_entries = HashMap::new(); + if !opts.paths.is_empty() { + for path in &opts.paths { + let (sub_dir_entries, _) = + read_staged_entries_below_path(repo, &staged_db, path, &read_progress)?; + dir_entries.extend(sub_dir_entries); + } + } else { + let (entries, _) = read_staged_entries(repo, &staged_db, &read_progress)?; + dir_entries = entries; + } + + read_progress.finish_and_clear(); + + // Process staged entries and build staged data + status_from_dir_entries(&mut staged_data, dir_entries)?; + } + + // Find merge conflicts + let conflicts = repositories::merge::list_conflicts(repo)?; + for conflict in conflicts { + staged_data + .merge_conflicts + .push(conflict.to_entry_merge_conflict()); + } + + Ok(staged_data) +} + pub fn status_from_opts( repo: &LocalRepository, opts: &StagedDataOpts, @@ -1107,3 +1259,297 @@ fn maybe_get_dir_children( Ok(None) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::test; + use std::collections::HashSet; + use std::path::PathBuf; + use std::time::SystemTime; + + #[tokio::test] + async fn test_merge_watcher_with_staged_empty_watcher() -> Result<(), OxenError> { + test::run_empty_local_repo_test(|repo| { + // Create empty watcher status + let watcher_status = WatcherStatus { + untracked: HashSet::new(), + modified: HashSet::new(), + removed: HashSet::new(), + scan_complete: true, + last_updated: SystemTime::now(), + }; + + let opts = StagedDataOpts::default(); + + // Run merge function + let result = merge_watcher_with_staged(&repo, &opts, watcher_status)?; + + // Verify result has empty collections + assert_eq!(result.untracked_files.len(), 0); + assert_eq!(result.modified_files.len(), 0); + assert_eq!(result.removed_files.len(), 0); + assert_eq!(result.untracked_dirs.len(), 0); + + Ok(()) + }) + } + + #[tokio::test] + async fn test_merge_watcher_with_untracked_files() -> Result<(), OxenError> { + test::run_empty_local_repo_test(|repo| { + // Create watcher status with untracked files + let mut untracked = HashSet::new(); + untracked.insert(PathBuf::from("file1.txt")); + untracked.insert(PathBuf::from("dir/file2.txt")); + untracked.insert(PathBuf::from("dir/subdir/file3.txt")); + + let watcher_status = WatcherStatus { + untracked: untracked.clone(), + modified: HashSet::new(), + removed: HashSet::new(), + scan_complete: true, + last_updated: SystemTime::now(), + }; + + let opts = StagedDataOpts::default(); + + // Run merge function + let result = merge_watcher_with_staged(&repo, &opts, watcher_status)?; + + // Verify untracked files are present + assert_eq!(result.untracked_files.len(), 3); + assert!(result.untracked_files.contains(&PathBuf::from("file1.txt"))); + assert!(result + .untracked_files + .contains(&PathBuf::from("dir/file2.txt"))); + assert!(result + .untracked_files + .contains(&PathBuf::from("dir/subdir/file3.txt"))); + + // Verify untracked directories are extracted + assert_eq!(result.untracked_dirs.len(), 2); + let dir_map: HashMap = result.untracked_dirs.into_iter().collect(); + assert_eq!(dir_map.get(&PathBuf::from("dir")), Some(&1)); + assert_eq!(dir_map.get(&PathBuf::from("dir/subdir")), Some(&1)); + + Ok(()) + }) + } + + #[tokio::test] + async fn test_merge_watcher_with_modified_and_removed() -> Result<(), OxenError> { + test::run_empty_local_repo_test(|repo| { + // Create watcher status with modified and removed files + let mut modified = HashSet::new(); + modified.insert(PathBuf::from("modified1.txt")); + modified.insert(PathBuf::from("modified2.txt")); + + let mut removed = HashSet::new(); + removed.insert(PathBuf::from("removed1.txt")); + removed.insert(PathBuf::from("dir/removed2.txt")); + + let watcher_status = WatcherStatus { + untracked: HashSet::new(), + modified, + removed, + scan_complete: true, + last_updated: SystemTime::now(), + }; + + let opts = StagedDataOpts::default(); + + // Run merge function + let result = merge_watcher_with_staged(&repo, &opts, watcher_status)?; + + // Verify modified files + assert_eq!(result.modified_files.len(), 2); + assert!(result + .modified_files + .contains(&PathBuf::from("modified1.txt"))); + assert!(result + .modified_files + .contains(&PathBuf::from("modified2.txt"))); + + // Verify removed files + assert_eq!(result.removed_files.len(), 2); + assert!(result + .removed_files + .contains(&PathBuf::from("removed1.txt"))); + assert!(result + .removed_files + .contains(&PathBuf::from("dir/removed2.txt"))); + + Ok(()) + }) + } + + #[tokio::test] + async fn test_merge_watcher_with_path_filtering() -> Result<(), OxenError> { + test::run_empty_local_repo_test(|repo| { + // Create watcher status with files in different directories + let mut untracked = HashSet::new(); + untracked.insert(PathBuf::from("dir1/file1.txt")); + untracked.insert(PathBuf::from("dir2/file2.txt")); + untracked.insert(PathBuf::from("dir3/file3.txt")); + + let mut modified = HashSet::new(); + modified.insert(PathBuf::from("dir1/modified.txt")); + modified.insert(PathBuf::from("dir2/modified.txt")); + + let watcher_status = WatcherStatus { + untracked, + modified, + removed: HashSet::new(), + scan_complete: true, + last_updated: SystemTime::now(), + }; + + // Create opts with specific path filter + let opts = StagedDataOpts { + paths: vec![repo.path.join("dir1")], + ..StagedDataOpts::default() + }; + + // Run merge function + let result = merge_watcher_with_staged(&repo, &opts, watcher_status)?; + + // Verify only dir1 files are included + assert_eq!(result.untracked_files.len(), 1); + assert!(result + .untracked_files + .contains(&PathBuf::from("dir1/file1.txt"))); + + assert_eq!(result.modified_files.len(), 1); + assert!(result + .modified_files + .contains(&PathBuf::from("dir1/modified.txt"))); + + Ok(()) + }) + } + + #[tokio::test] + async fn test_merge_watcher_with_oxenignore() -> Result<(), OxenError> { + test::run_empty_local_repo_test(|repo| { + // Create .oxenignore file + let oxenignore_path = repo.path.join(".oxenignore"); + test::write_txt_file_to_path(&oxenignore_path, "*.log\ntemp/\n")?; + + // Create watcher status with ignored and non-ignored files + let mut untracked = HashSet::new(); + untracked.insert(PathBuf::from("file.txt")); + untracked.insert(PathBuf::from("debug.log")); // Should be ignored + untracked.insert(PathBuf::from("temp/file.txt")); // Should be ignored + untracked.insert(PathBuf::from("data/file.txt")); + + let watcher_status = WatcherStatus { + untracked, + modified: HashSet::new(), + removed: HashSet::new(), + scan_complete: true, + last_updated: SystemTime::now(), + }; + + let opts = StagedDataOpts::default(); + + // Run merge function + let result = merge_watcher_with_staged(&repo, &opts, watcher_status)?; + + // Verify ignored files are filtered out + assert_eq!(result.untracked_files.len(), 2); + assert!(result.untracked_files.contains(&PathBuf::from("file.txt"))); + assert!(result + .untracked_files + .contains(&PathBuf::from("data/file.txt"))); + assert!(!result.untracked_files.contains(&PathBuf::from("debug.log"))); + assert!(!result + .untracked_files + .contains(&PathBuf::from("temp/file.txt"))); + + Ok(()) + }) + } + + #[tokio::test] + async fn test_merge_watcher_extracts_directories() -> Result<(), OxenError> { + test::run_empty_local_repo_test(|repo| { + // Create watcher status with files in nested directories + let mut untracked = HashSet::new(); + untracked.insert(PathBuf::from("a/file1.txt")); + untracked.insert(PathBuf::from("a/file2.txt")); + untracked.insert(PathBuf::from("a/b/file3.txt")); + untracked.insert(PathBuf::from("a/b/file4.txt")); + untracked.insert(PathBuf::from("a/b/c/file5.txt")); + untracked.insert(PathBuf::from("d/file6.txt")); + + let watcher_status = WatcherStatus { + untracked, + modified: HashSet::new(), + removed: HashSet::new(), + scan_complete: true, + last_updated: SystemTime::now(), + }; + + let opts = StagedDataOpts::default(); + + // Run merge function + let result = merge_watcher_with_staged(&repo, &opts, watcher_status)?; + + // Verify all files are present + assert_eq!(result.untracked_files.len(), 6); + + // Verify directories are correctly extracted with counts + let dir_map: HashMap = result.untracked_dirs.into_iter().collect(); + assert_eq!(dir_map.get(&PathBuf::from("a")), Some(&2)); // 2 files directly in 'a' + assert_eq!(dir_map.get(&PathBuf::from("a/b")), Some(&2)); // 2 files directly in 'a/b' + assert_eq!(dir_map.get(&PathBuf::from("a/b/c")), Some(&1)); // 1 file in 'a/b/c' + assert_eq!(dir_map.get(&PathBuf::from("d")), Some(&1)); // 1 file in 'd' + + Ok(()) + }) + } + + #[tokio::test] + async fn test_merge_watcher_all_file_types() -> Result<(), OxenError> { + test::run_empty_local_repo_test(|repo| { + // Create watcher status with all types of changes + let mut untracked = HashSet::new(); + untracked.insert(PathBuf::from("new1.txt")); + untracked.insert(PathBuf::from("new2.txt")); + + let mut modified = HashSet::new(); + modified.insert(PathBuf::from("changed1.txt")); + modified.insert(PathBuf::from("changed2.txt")); + + let mut removed = HashSet::new(); + removed.insert(PathBuf::from("deleted1.txt")); + removed.insert(PathBuf::from("deleted2.txt")); + + let watcher_status = WatcherStatus { + untracked: untracked.clone(), + modified: modified.clone(), + removed: removed.clone(), + scan_complete: true, + last_updated: SystemTime::now(), + }; + + let opts = StagedDataOpts::default(); + + // Run merge function + let result = merge_watcher_with_staged(&repo, &opts, watcher_status)?; + + // Verify all file types are present + // Convert Vec to HashSet for comparison + let result_untracked: HashSet = result.untracked_files.into_iter().collect(); + assert_eq!(result_untracked, untracked); + assert_eq!(result.modified_files, modified); + assert_eq!(result.removed_files, removed); + + // Verify we have merge conflicts (should be empty for test repo) + assert_eq!(result.merge_conflicts.len(), 0); + + Ok(()) + }) + } +} diff --git a/oxen-rust/src/lib/src/core/v_latest/watcher_client.rs b/oxen-rust/src/lib/src/core/v_latest/watcher_client.rs new file mode 100644 index 000000000..20619ecb5 --- /dev/null +++ b/oxen-rust/src/lib/src/core/v_latest/watcher_client.rs @@ -0,0 +1,216 @@ +use crate::error::OxenError; +use crate::model::LocalRepository; +use std::collections::HashSet; +use std::path::PathBuf; +use std::time::SystemTime; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use tokio::net::UnixStream; + +/// Client for communicating with the filesystem watcher daemon +pub struct WatcherClient { + socket_path: PathBuf, +} + +/// Status data received from the watcher +#[derive(Debug, Clone)] +pub struct WatcherStatus { + pub untracked: HashSet, + pub modified: HashSet, + pub removed: HashSet, + pub scan_complete: bool, + pub last_updated: SystemTime, +} + +impl WatcherClient { + /// Try to connect to the watcher daemon for a repository + pub async fn connect(repo: &LocalRepository) -> Option { + let socket_path = repo.path.join(".oxen/watcher.sock"); + + // Check if socket exists + if !socket_path.exists() { + log::debug!("Watcher socket does not exist at {:?}", socket_path); + return None; + } + + // Return client with socket path - actual connection happens in get_status/ping + log::debug!("Watcher socket found at {:?}", socket_path); + Some(Self { socket_path }) + } + + /// Get the current status from the watcher + pub async fn get_status(&self) -> Result { + // Connect to the socket + let mut stream = UnixStream::connect(&self.socket_path) + .await + .map_err(|e| OxenError::basic_str(&format!("Failed to connect to watcher: {}", e)))?; + + // Create request using the watcher protocol + // We need to import the protocol types from the watcher crate + let request = WatcherRequest::GetStatus { paths: None }; + let request_bytes = rmp_serde::to_vec(&request) + .map_err(|e| OxenError::basic_str(&format!("Failed to serialize request: {}", e)))?; + + // Send request (length-prefixed) + let len = request_bytes.len() as u32; + stream + .write_all(&len.to_le_bytes()) + .await + .map_err(|e| OxenError::basic_str(&format!("Failed to write request length: {}", e)))?; + stream + .write_all(&request_bytes) + .await + .map_err(|e| OxenError::basic_str(&format!("Failed to write request: {}", e)))?; + stream + .flush() + .await + .map_err(|e| OxenError::basic_str(&format!("Failed to flush stream: {}", e)))?; + + // Read response length + let mut len_buf = [0u8; 4]; + stream + .read_exact(&mut len_buf) + .await + .map_err(|e| OxenError::basic_str(&format!("Failed to read response length: {}", e)))?; + let response_len = u32::from_le_bytes(len_buf) as usize; + + // Sanity check response size + if response_len > 100 * 1024 * 1024 { + // 100MB max + return Err(OxenError::basic_str(&format!( + "Response too large: {} bytes", + response_len + ))); + } + + // Read response body + let mut response_buf = vec![0u8; response_len]; + stream + .read_exact(&mut response_buf) + .await + .map_err(|e| OxenError::basic_str(&format!("Failed to read response: {}", e)))?; + + // Deserialize response + let response: WatcherResponse = rmp_serde::from_slice(&response_buf) + .map_err(|e| OxenError::basic_str(&format!("Failed to deserialize response: {}", e)))?; + + // Gracefully shutdown the connection + let _ = stream.shutdown().await; + + // Convert response to WatcherStatus + match response { + WatcherResponse::Status(status_result) => Ok(WatcherStatus { + untracked: status_result.untracked.into_iter().collect(), + modified: status_result.modified.into_iter().map(|f| f.path).collect(), + removed: status_result.removed.into_iter().collect(), + scan_complete: status_result.scan_complete, + last_updated: SystemTime::now(), + }), + WatcherResponse::Error(msg) => { + Err(OxenError::basic_str(&format!("Watcher error: {}", msg))) + } + _ => Err(OxenError::basic_str("Unexpected response from watcher")), + } + } + + /// Check if the watcher is responsive + pub async fn ping(&self) -> bool { + match UnixStream::connect(&self.socket_path).await { + Ok(mut stream) => { + // Send ping request + let request = WatcherRequest::Ping; + if let Ok(request_bytes) = rmp_serde::to_vec(&request) { + let len = request_bytes.len() as u32; + if stream.write_all(&len.to_le_bytes()).await.is_ok() + && stream.write_all(&request_bytes).await.is_ok() + && stream.flush().await.is_ok() + { + // Try to read response + let mut len_buf = [0u8; 4]; + if stream.read_exact(&mut len_buf).await.is_ok() { + let response_len = u32::from_le_bytes(len_buf) as usize; + if response_len < 1000 { + // Ping response should be small + let mut response_buf = vec![0u8; response_len]; + if stream.read_exact(&mut response_buf).await.is_ok() { + // Gracefully shutdown the connection before checking response + let _ = stream.shutdown().await; + if let Ok(response) = + rmp_serde::from_slice::(&response_buf) + { + matches!(response, WatcherResponse::Ok) + } else { + false + } + } else { + false + } + } else { + false + } + } else { + false + } + } else { + false + } + } else { + false + } + } + Err(_) => false, + } + } +} + +// We need to define the protocol types here temporarily +// In a real implementation, these would be imported from the watcher crate +// or defined in a shared protocol module + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +enum WatcherRequest { + GetStatus { paths: Option> }, + GetSummary, + Refresh { paths: Vec }, + Shutdown, + Ping, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +enum WatcherResponse { + Status(StatusResult), + Summary { + modified: usize, + added: usize, + removed: usize, + untracked: usize, + last_updated: SystemTime, + }, + Ok, + Error(String), +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +struct StatusResult { + pub modified: Vec, + pub added: Vec, + pub removed: Vec, + pub untracked: Vec, + pub scan_complete: bool, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +struct FileStatus { + pub path: PathBuf, + pub mtime: SystemTime, + pub size: u64, + pub hash: Option, + pub status: FileStatusType, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +enum FileStatusType { + Modified, + Added, + Removed, + Untracked, +} diff --git a/oxen-rust/src/watcher/src/ipc.rs b/oxen-rust/src/watcher/src/ipc.rs index 5c44b4684..004d1284a 100644 --- a/oxen-rust/src/watcher/src/ipc.rs +++ b/oxen-rust/src/watcher/src/ipc.rs @@ -18,24 +18,24 @@ impl IpcServer { pub fn new(repo_path: PathBuf, cache: Arc) -> Self { Self { repo_path, cache } } - + /// Run the IPC server pub async fn run(self) -> Result<(), WatcherError> { let socket_path = self.repo_path.join(".oxen/watcher.sock"); - + // Remove old socket if it exists if socket_path.exists() { std::fs::remove_file(&socket_path)?; } - + // Create the Unix socket listener let listener = UnixListener::bind(&socket_path)?; info!("IPC server listening on {}", socket_path.display()); - + // Track last request time for idle timeout let idle_timeout = Duration::from_secs(600); // 10 minutes let mut last_request = Instant::now(); - + loop { // Accept connections with timeout check tokio::select! { @@ -43,7 +43,7 @@ impl IpcServer { match result { Ok((stream, _)) => { last_request = Instant::now(); - + // Handle client in a separate task let cache = self.cache.clone(); tokio::spawn(async move { @@ -57,7 +57,7 @@ impl IpcServer { } } } - + // Check for idle timeout _ = tokio::time::sleep(Duration::from_secs(60)) => { if last_request.elapsed() > idle_timeout { @@ -67,7 +67,7 @@ impl IpcServer { } } } - + Ok(()) } } @@ -77,32 +77,33 @@ async fn handle_client( mut stream: UnixStream, cache: Arc, ) -> Result<(), WatcherError> { + info!("Handling incoming client connection"); // Read message length (4 bytes, little-endian) let mut len_buf = [0u8; 4]; stream.read_exact(&mut len_buf).await?; let len = u32::from_le_bytes(len_buf) as usize; - + // Sanity check message size (max 10MB) if len > 10 * 1024 * 1024 { error!("Message too large: {} bytes", len); return Err(WatcherError::Communication("Message too large".to_string())); } - + // Read message body let mut msg_buf = vec![0u8; len]; stream.read_exact(&mut msg_buf).await?; - + // Deserialize request let request = WatcherRequest::from_bytes(&msg_buf)?; - debug!("Received request: {:?}", request); - + info!("Received request: {:?}", request); + // Process request let response = match request { WatcherRequest::GetStatus { paths } => { let status = cache.get_status(paths).await; WatcherResponse::Status(status) } - + WatcherRequest::GetSummary => { let status = cache.get_status(None).await; WatcherResponse::Summary { @@ -113,31 +114,30 @@ async fn handle_client( last_updated: std::time::SystemTime::now(), } } - + WatcherRequest::Refresh { paths } => { // TODO: Implement forced refresh debug!("Refresh requested for {:?}", paths); WatcherResponse::Ok } - + WatcherRequest::Shutdown => { info!("Shutdown requested via IPC"); // Send response before shutting down let response = WatcherResponse::Ok; send_response(&mut stream, &response).await?; - + // Exit the process std::process::exit(0); } - - WatcherRequest::Ping => { - WatcherResponse::Ok - } + + WatcherRequest::Ping => WatcherResponse::Ok, }; - + // Send response send_response(&mut stream, &response).await?; - + info!("Sent response"); + Ok(()) } @@ -148,15 +148,15 @@ async fn send_response( ) -> Result<(), WatcherError> { // Serialize response let msg = response.to_bytes()?; - + // Write length prefix let len = msg.len() as u32; stream.write_all(&len.to_le_bytes()).await?; - + // Write message stream.write_all(&msg).await?; stream.flush().await?; - + Ok(()) } @@ -169,29 +169,29 @@ pub async fn send_request( let mut stream = UnixStream::connect(socket_path) .await .map_err(|e| WatcherError::Communication(format!("Failed to connect: {}", e)))?; - + // Serialize request let msg = request.to_bytes()?; - + // Send length prefix let len = msg.len() as u32; stream.write_all(&len.to_le_bytes()).await?; - + // Send message stream.write_all(&msg).await?; stream.flush().await?; - + // Read response length let mut len_buf = [0u8; 4]; stream.read_exact(&mut len_buf).await?; let len = u32::from_le_bytes(len_buf) as usize; - + // Read response body let mut msg_buf = vec![0u8; len]; stream.read_exact(&mut msg_buf).await?; - + // Deserialize response let response = WatcherResponse::from_bytes(&msg_buf)?; - + Ok(response) -} \ No newline at end of file +} diff --git a/oxen-rust/src/watcher/src/monitor.rs b/oxen-rust/src/watcher/src/monitor.rs index 2d2cf4d3f..3a08bce5a 100644 --- a/oxen-rust/src/watcher/src/monitor.rs +++ b/oxen-rust/src/watcher/src/monitor.rs @@ -1,14 +1,17 @@ -use crate::cache::StatusCache; -use crate::error::WatcherError; -use crate::event_processor::EventProcessor; -use crate::ipc::IpcServer; -use liboxen::model::LocalRepository; use log::{error, info, warn}; use notify::{Event, RecursiveMode, Watcher}; use std::path::PathBuf; use std::sync::Arc; use tokio::sync::mpsc; +use liboxen::core; +use liboxen::model::LocalRepository; + +use crate::cache::StatusCache; +use crate::error::WatcherError; +use crate::event_processor::EventProcessor; +use crate::ipc::IpcServer; + /// Main filesystem watcher that coordinates all components pub struct FileSystemWatcher { repo_path: PathBuf, @@ -149,16 +152,16 @@ async fn initial_scan(repo_path: PathBuf, cache: Arc) -> Result<(), } }; - let (mtime, size) = if let Ok(metadata) = std::fs::metadata(repo_path.join(path.clone())) - { - ( - metadata.modified().unwrap_or(std::time::SystemTime::now()), - metadata.len(), - ) - } else { - // File might not exist if it was removed - (std::time::SystemTime::now(), 0) - }; + let (mtime, size) = + if let Ok(metadata) = std::fs::metadata(repo_path.join(path.clone())) { + ( + metadata.modified().unwrap_or(std::time::SystemTime::now()), + metadata.len(), + ) + } else { + // File might not exist if it was removed + (std::time::SystemTime::now(), 0) + }; file_statuses.push(crate::protocol::FileStatus { path: path.clone(), @@ -194,5 +197,9 @@ async fn initial_scan(repo_path: PathBuf, cache: Arc) -> Result<(), } } + // Remove cached ref DB connection so it doesn't block other connections + // TODO: update the ref_manager with the option to NOT cache the connection, + // similar to how we configure the merkle tree node cache + core::refs::remove_from_cache(repo.path)?; Ok(()) } From 69487e6d8338113c678433b1ebafd3c8f26b9e23 Mon Sep 17 00:00:00 2001 From: Joshua Elliott Date: Thu, 21 Aug 2025 11:31:58 -0600 Subject: [PATCH 04/11] Watcher reports paths relative to repo root --- oxen-rust/src/watcher/src/event_processor.rs | 16 ++++- .../src/watcher/src/event_processor_test.rs | 2 +- oxen-rust/src/watcher/src/monitor.rs | 2 +- .../src/watcher/tests/integration_test.rs | 66 +++++++++++++++++++ 4 files changed, 81 insertions(+), 5 deletions(-) diff --git a/oxen-rust/src/watcher/src/event_processor.rs b/oxen-rust/src/watcher/src/event_processor.rs index a46ba9276..be8639af9 100644 --- a/oxen-rust/src/watcher/src/event_processor.rs +++ b/oxen-rust/src/watcher/src/event_processor.rs @@ -15,11 +15,12 @@ mod event_processor_test; /// Processes filesystem events and updates the cache pub struct EventProcessor { cache: Arc, + repo_path: PathBuf, } impl EventProcessor { - pub fn new(cache: Arc) -> Self { - Self { cache } + pub fn new(cache: Arc, repo_path: PathBuf) -> Self { + Self { cache, repo_path } } /// Run the event processing loop @@ -127,8 +128,17 @@ impl EventProcessor { continue; }; + // Convert absolute path to relative path + let relative_path = match path.strip_prefix(&self.repo_path) { + Ok(rel) => rel.to_path_buf(), + Err(_) => { + // Path is not within repo, skip it + continue; + } + }; + updates.push(FileStatus { - path: path.clone(), + path: relative_path, mtime, size, hash: None, // Will be computed later if needed diff --git a/oxen-rust/src/watcher/src/event_processor_test.rs b/oxen-rust/src/watcher/src/event_processor_test.rs index 5eada457a..31b300a21 100644 --- a/oxen-rust/src/watcher/src/event_processor_test.rs +++ b/oxen-rust/src/watcher/src/event_processor_test.rs @@ -22,7 +22,7 @@ mod tests { let cache = Arc::new(StatusCache::new(repo_path).unwrap()); let (event_tx, event_rx) = mpsc::channel::(100); - let processor = EventProcessor::new(cache.clone()); + let processor = EventProcessor::new(cache.clone(), repo_path.to_path_buf()); // Start processor in background tokio::spawn(async move { diff --git a/oxen-rust/src/watcher/src/monitor.rs b/oxen-rust/src/watcher/src/monitor.rs index 3a08bce5a..3f2d9fbaf 100644 --- a/oxen-rust/src/watcher/src/monitor.rs +++ b/oxen-rust/src/watcher/src/monitor.rs @@ -63,7 +63,7 @@ impl FileSystemWatcher { info!("Watching directory: {}", self.repo_path.display()); // Start the event processor - let processor = EventProcessor::new(self.cache.clone()); + let processor = EventProcessor::new(self.cache.clone(), self.repo_path.clone()); let processor_handle = tokio::spawn(async move { processor.run(event_rx).await }); // Start the IPC server diff --git a/oxen-rust/src/watcher/tests/integration_test.rs b/oxen-rust/src/watcher/tests/integration_test.rs index b33dc03b8..eba4a9e87 100644 --- a/oxen-rust/src/watcher/tests/integration_test.rs +++ b/oxen-rust/src/watcher/tests/integration_test.rs @@ -3,6 +3,8 @@ use std::time::Duration; use tempfile::TempDir; use tokio::process::Command; use tokio::time; +use oxen_watcher::ipc::send_request; +use oxen_watcher::protocol::{WatcherRequest, WatcherResponse}; /// Helper to get the watcher binary path fn get_watcher_path() -> PathBuf { @@ -198,3 +200,67 @@ async fn test_multiple_watcher_prevention() { // Clean up let _ = first_watcher.kill().await; } + +#[tokio::test] +#[ignore] +async fn test_watcher_reports_relative_paths() { + let temp_dir = TempDir::new().unwrap(); + let repo_path = temp_dir.path(); + + // Initialize an oxen repository + liboxen::repositories::init::init(repo_path).unwrap(); + + let watcher_path = get_watcher_path(); + + // Start the watcher + let mut watcher_process = Command::new(&watcher_path) + .arg("start") + .arg("--repo") + .arg(repo_path) + .spawn() + .expect("Failed to start watcher"); + + // Give it time to start and do initial scan + time::sleep(Duration::from_secs(3)).await; + + // Create test files in different directories + std::fs::write(repo_path.join("root_file.txt"), "root content").unwrap(); + std::fs::create_dir_all(repo_path.join("subdir")).unwrap(); + std::fs::write(repo_path.join("subdir/nested_file.txt"), "nested content").unwrap(); + + // Give watcher time to detect the changes + time::sleep(Duration::from_millis(500)).await; + + // Query the watcher via IPC + let socket_path = repo_path.join(".oxen/watcher.sock"); + let request = WatcherRequest::GetStatus { paths: None }; + let response = send_request(&socket_path, request).await.expect("Failed to send request"); + + // Verify the response contains relative paths + if let WatcherResponse::Status(status) = response { + // Check that all paths are relative + for path in &status.untracked { + assert!(!path.is_absolute(), "Path should be relative, got: {:?}", path); + assert!(!path.starts_with("/"), "Path should not start with /, got: {:?}", path); + } + + // Verify specific files are present with correct relative paths + let paths: Vec<_> = status.untracked.iter().map(|p| p.to_string_lossy().to_string()).collect(); + assert!(paths.contains(&"root_file.txt".to_string()), "Should contain root_file.txt"); + assert!(paths.contains(&"subdir/nested_file.txt".to_string()), "Should contain subdir/nested_file.txt"); + } else { + panic!("Expected Status response, got: {:?}", response); + } + + // Stop the watcher + Command::new(&watcher_path) + .arg("stop") + .arg("--repo") + .arg(repo_path) + .output() + .await + .expect("Failed to stop watcher"); + + // Clean up + let _ = watcher_process.kill().await; +} From 7295ebb240c5058d0993ba0b61cd75e8a1d14eda Mon Sep 17 00:00:00 2001 From: Joshua Elliott Date: Fri, 22 Aug 2025 10:24:33 -0600 Subject: [PATCH 05/11] Improve debugging output --- oxen-rust/src/watcher/src/cache.rs | 67 ++++++++++++++++----------- oxen-rust/src/watcher/src/ipc.rs | 1 + oxen-rust/src/watcher/src/protocol.rs | 60 +++++++++++++++++++++--- 3 files changed, 95 insertions(+), 33 deletions(-) diff --git a/oxen-rust/src/watcher/src/cache.rs b/oxen-rust/src/watcher/src/cache.rs index bb190650b..a4647226c 100644 --- a/oxen-rust/src/watcher/src/cache.rs +++ b/oxen-rust/src/watcher/src/cache.rs @@ -1,12 +1,15 @@ -use crate::error::WatcherError; -use crate::protocol::{FileStatus, FileStatusType, StatusResult}; -use liboxen::model::LocalRepository; use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::SystemTime; + +use liboxen::model::LocalRepository; +use log::info; use tokio::sync::RwLock; +use crate::error::WatcherError; +use crate::protocol::{FileStatus, FileStatusType, StatusResult}; + #[path = "cache_test.rs"] mod cache_test; @@ -31,7 +34,7 @@ impl StatusCache { pub fn new(repo_path: &Path) -> Result { // Verify it's a valid repository let _repo = LocalRepository::from_dir(repo_path)?; - + // Initialize memory cache let cache = Arc::new(RwLock::new(MemoryCache { modified: HashMap::new(), @@ -41,32 +44,40 @@ impl StatusCache { scan_complete: false, last_update: SystemTime::now(), })); - + Ok(Self { cache }) } - + /// Get the current status, optionally filtered by paths pub async fn get_status(&self, paths: Option>) -> StatusResult { let cache = self.cache.read().await; - + // Filter by paths if requested let (modified, added, removed, untracked) = if let Some(paths) = paths { let path_set: std::collections::HashSet<_> = paths.iter().collect(); - + ( - cache.modified.values() + cache + .modified + .values() .filter(|f| path_set.contains(&f.path)) .cloned() .collect(), - cache.added.values() + cache + .added + .values() .filter(|f| path_set.contains(&f.path)) .cloned() .collect(), - cache.removed.keys() + cache + .removed + .keys() .filter(|p| path_set.contains(p)) .cloned() .collect(), - cache.untracked.keys() + cache + .untracked + .keys() .filter(|p| path_set.contains(p)) .cloned() .collect(), @@ -79,7 +90,11 @@ impl StatusCache { cache.untracked.keys().cloned().collect(), ) }; - + + if !cache.scan_complete { + info!("Scan not complete"); + } + StatusResult { modified, added, @@ -88,12 +103,12 @@ impl StatusCache { scan_complete: cache.scan_complete, } } - + /// Update a file's status in the cache - #[allow(dead_code)] // Used in tests + #[allow(dead_code)] // Used in tests pub async fn update_file_status(&self, status: FileStatus) -> Result<(), WatcherError> { let mut cache = self.cache.write().await; - + // Update memory cache match status.status { FileStatusType::Modified => { @@ -118,16 +133,16 @@ impl StatusCache { cache.added.remove(&status.path); } } - + cache.last_update = SystemTime::now(); - + Ok(()) } - + /// Batch update multiple file statuses pub async fn batch_update(&self, statuses: Vec) -> Result<(), WatcherError> { let mut cache = self.cache.write().await; - + for status in statuses { // Update memory cache match status.status { @@ -154,21 +169,21 @@ impl StatusCache { } } } - + cache.last_update = SystemTime::now(); - + Ok(()) } - + /// Mark the initial scan as complete pub async fn mark_scan_complete(&self) -> Result<(), WatcherError> { let mut cache = self.cache.write().await; cache.scan_complete = true; Ok(()) } - + /// Clear the entire cache - #[allow(dead_code)] // Used in tests + #[allow(dead_code)] // Used in tests pub async fn clear(&self) -> Result<(), WatcherError> { let mut cache = self.cache.write().await; cache.modified.clear(); @@ -179,4 +194,4 @@ impl StatusCache { cache.last_update = SystemTime::now(); Ok(()) } -} \ No newline at end of file +} diff --git a/oxen-rust/src/watcher/src/ipc.rs b/oxen-rust/src/watcher/src/ipc.rs index 004d1284a..003514d7d 100644 --- a/oxen-rust/src/watcher/src/ipc.rs +++ b/oxen-rust/src/watcher/src/ipc.rs @@ -101,6 +101,7 @@ async fn handle_client( let response = match request { WatcherRequest::GetStatus { paths } => { let status = cache.get_status(paths).await; + debug!("Status response:\n{}", status); WatcherResponse::Status(status) } diff --git a/oxen-rust/src/watcher/src/protocol.rs b/oxen-rust/src/watcher/src/protocol.rs index 5bf2f2bf2..3501b3be1 100644 --- a/oxen-rust/src/watcher/src/protocol.rs +++ b/oxen-rust/src/watcher/src/protocol.rs @@ -1,4 +1,5 @@ use serde::{Deserialize, Serialize}; +use std::fmt::{Debug, Display}; use std::path::PathBuf; use std::time::SystemTime; @@ -16,9 +17,7 @@ pub enum WatcherRequest { /// Get a summary of changes (just counts) GetSummary, /// Force a refresh/rescan of specific paths - Refresh { - paths: Vec, - }, + Refresh { paths: Vec }, /// Shutdown the watcher daemon Shutdown, /// Health check ping @@ -55,8 +54,23 @@ pub struct StatusResult { pub scan_complete: bool, } +impl Display for StatusResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.modified.iter().for_each(|status| { + writeln!(f, "{}", status).unwrap(); + }); + self.removed.iter().for_each(|path| { + writeln!(f, "[Removed]\t{:?}", path).unwrap(); + }); + self.untracked.iter().for_each(|path| { + writeln!(f, "[Untracked]\t{:?}", path).unwrap(); + }); + Ok(()) + } +} + /// Status of a single file -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Clone, Serialize, Deserialize)] pub struct FileStatus { pub path: PathBuf, pub mtime: SystemTime, @@ -65,6 +79,38 @@ pub struct FileStatus { pub status: FileStatusType, } +impl Debug for FileStatus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("FileStatus") + .field("path", &self.path) + .field( + "mtime", + &self + .mtime + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_millis(), + ) + .field("status", &self.status) + .finish() + } +} + +impl Display for FileStatus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "[{:?}] {} {}", + self.status, + self.mtime + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_millis(), + self.path.display(), + ) + } +} + /// Type of file status #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum FileStatusType { @@ -79,7 +125,7 @@ impl WatcherRequest { pub fn to_bytes(&self) -> Result, rmp_serde::encode::Error> { rmp_serde::to_vec(self) } - + /// Deserialize request from MessagePack bytes pub fn from_bytes(bytes: &[u8]) -> Result { rmp_serde::from_slice(bytes) @@ -91,9 +137,9 @@ impl WatcherResponse { pub fn to_bytes(&self) -> Result, rmp_serde::encode::Error> { rmp_serde::to_vec(self) } - + /// Deserialize response from MessagePack bytes pub fn from_bytes(bytes: &[u8]) -> Result { rmp_serde::from_slice(bytes) } -} \ No newline at end of file +} From 2998b8a96deb8130312e82e65e2fbf433469ac67 Mon Sep 17 00:00:00 2001 From: Joshua Elliott Date: Tue, 26 Aug 2025 15:30:26 -0600 Subject: [PATCH 06/11] Fix building watcher with entire workspace --- oxen-rust/Cargo.lock | 107 +++++++++++++++++++++++++++++++ oxen-rust/Cargo.toml | 10 +-- oxen-rust/src/watcher/Cargo.toml | 4 +- 3 files changed, 115 insertions(+), 6 deletions(-) diff --git a/oxen-rust/Cargo.lock b/oxen-rust/Cargo.lock index f33e5cc79..7ce4cb8ad 100644 --- a/oxen-rust/Cargo.lock +++ b/oxen-rust/Cargo.lock @@ -66,6 +66,7 @@ dependencies = [ "minus", "mockito", "mp4", + "notify", "num_cpus", "os_path", "par-stream", @@ -97,6 +98,7 @@ dependencies = [ "sysinfo", "tar", "tempfile", + "thiserror 2.0.12", "time", "tokio", "tokio-stream", @@ -2609,6 +2611,15 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +[[package]] +name = "fsevent-sys" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2" +dependencies = [ + "libc", +] + [[package]] name = "funty" version = "2.0.0" @@ -3342,6 +3353,26 @@ dependencies = [ "cfb", ] +[[package]] +name = "inotify" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f37dccff2791ab604f9babef0ba14fbe0be30bd368dc541e2b08d07c8aa908f3" +dependencies = [ + "bitflags 2.9.1", + "inotify-sys", + "libc", +] + +[[package]] +name = "inotify-sys" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" +dependencies = [ + "libc", +] + [[package]] name = "inout" version = "0.1.4" @@ -3534,6 +3565,26 @@ dependencies = [ "rayon", ] +[[package]] +name = "kqueue" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac30106d7dce88daf4a3fcb4879ea939476d5074a9b7ddd0fb97fa4bed5596a" +dependencies = [ + "kqueue-sys", + "libc", +] + +[[package]] +name = "kqueue-sys" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed9625ffda8729b85e45cf04090035ac368927b8cebc34898e7c120f52e4838b" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + [[package]] name = "kv-log-macro" version = "1.0.7" @@ -4163,6 +4214,30 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8" +[[package]] +name = "notify" +version = "8.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d3d07927151ff8575b7087f245456e549fea62edf0ec4e565a5ee50c8402bc3" +dependencies = [ + "bitflags 2.9.1", + "fsevent-sys", + "inotify", + "kqueue", + "libc", + "log", + "mio 1.0.4", + "notify-types", + "walkdir", + "windows-sys 0.60.2", +] + +[[package]] +name = "notify-types" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e0826a989adedc2a244799e823aece04662b66609d96af8dff7ac6df9a8925d" + [[package]] name = "now" version = "0.1.3" @@ -4484,6 +4559,25 @@ dependencies = [ "uuid", ] +[[package]] +name = "oxen-watcher" +version = "0.36.3" +dependencies = [ + "chrono", + "clap", + "env_logger", + "liboxen", + "log", + "notify", + "rmp-serde", + "serde", + "serde_json", + "tempfile", + "thiserror 2.0.12", + "tokio", + "tokio-test", +] + [[package]] name = "par-stream" version = "0.10.2" @@ -6972,6 +7066,19 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-test" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2468baabc3311435b55dd935f702f42cd1b8abb7e754fb7dfb16bd36aa88f9f7" +dependencies = [ + "async-stream", + "bytes", + "futures-core", + "tokio", + "tokio-stream", +] + [[package]] name = "tokio-util" version = "0.7.16" diff --git a/oxen-rust/Cargo.toml b/oxen-rust/Cargo.toml index 1447eb83c..0a2c1e9fe 100644 --- a/oxen-rust/Cargo.toml +++ b/oxen-rust/Cargo.toml @@ -88,6 +88,7 @@ mockito = "1.1.0" mp4 = "0.14.0" mime = "0.3.17" minus = { version = "5.4.0", features = ["static_output", "search"] } +notify = "8.2" num_cpus = "1.16.0" parking_lot = "0.12.1" par-stream = { version = "0.10.2", features = ["runtime-tokio"] } @@ -132,6 +133,7 @@ sql_query_builder = { version = "2.1.0", features = ["postgresql"] } sysinfo = "0.33.0" tar = "0.4.44" tempfile = "3.8.0" +thiserror = "2.0" time = { version = "0.3.28", features = ["serde"] } tokio = { version = "1.32.0", features = ["full"] } tokio-stream = "0.1.17" @@ -174,10 +176,10 @@ name = "oxen-server" path = "src/server/src/main.rs" bench = false -# [[bin]] -# name = "oxen-watcher" -# path = "src/watcher/src/main.rs" -# bench = false +[[bin]] +name = "oxen-watcher" +path = "src/watcher/src/main.rs" +bench = false [package.metadata.docs.rs] default-target = "x86_64-unknown-linux-gnu" diff --git a/oxen-rust/src/watcher/Cargo.toml b/oxen-rust/src/watcher/Cargo.toml index d44078055..212ff1c9f 100644 --- a/oxen-rust/src/watcher/Cargo.toml +++ b/oxen-rust/src/watcher/Cargo.toml @@ -17,7 +17,7 @@ path = "src/lib.rs" [dependencies] liboxen = { path = "../lib" } -notify = "6.1" +notify = "8.2" tokio = { version = "1", features = ["full"] } rmp-serde = "1.3.0" serde = { version = "1.0", features = ["derive"] } @@ -30,4 +30,4 @@ thiserror = "2.0" [dev-dependencies] tempfile = "3.8" -tokio-test = "0.4" \ No newline at end of file +tokio-test = "0.4" From f10eea0fff0842a72c715a043abbb6a7325fef7e Mon Sep 17 00:00:00 2001 From: Joshua Elliott Date: Thu, 28 Aug 2025 18:23:57 -0600 Subject: [PATCH 07/11] Simplify watcher status types to keep repo logic in liboxen --- oxen-rust/src/lib/src/core/v_latest/status.rs | 18 +-- .../lib/src/core/v_latest/watcher_client.rs | 97 +++++++++--- oxen-rust/src/watcher/src/cache.rs | 82 ++++------ oxen-rust/src/watcher/src/cache_test.rs | 48 +++--- oxen-rust/src/watcher/src/cli.rs | 2 +- oxen-rust/src/watcher/src/event_processor.rs | 48 +++--- .../src/watcher/src/event_processor_test.rs | 17 +- oxen-rust/src/watcher/src/ipc.rs | 3 +- oxen-rust/src/watcher/src/lib.rs | 2 +- oxen-rust/src/watcher/src/main.rs | 12 +- oxen-rust/src/watcher/src/monitor.rs | 80 ++++------ oxen-rust/src/watcher/src/protocol.rs | 146 +----------------- oxen-rust/src/watcher/src/protocol_test.rs | 40 ++--- .../src/watcher/tests/integration_test.rs | 10 +- 14 files changed, 240 insertions(+), 365 deletions(-) diff --git a/oxen-rust/src/lib/src/core/v_latest/status.rs b/oxen-rust/src/lib/src/core/v_latest/status.rs index 6e561a462..0806b55f0 100644 --- a/oxen-rust/src/lib/src/core/v_latest/status.rs +++ b/oxen-rust/src/lib/src/core/v_latest/status.rs @@ -106,7 +106,7 @@ fn merge_watcher_with_staged( .collect(); staged_data.untracked_files = watcher - .untracked + .created .into_iter() .filter(|p| requested_paths.iter().any(|req| p.starts_with(req))) .filter(|p| !oxenignore::is_ignored(p, &oxenignore, false)) @@ -128,7 +128,7 @@ fn merge_watcher_with_staged( } else { // Use all watcher data with oxenignore filtering staged_data.untracked_files = watcher - .untracked + .created .into_iter() .filter(|p| !oxenignore::is_ignored(p, &oxenignore, false)) .collect(); @@ -1273,7 +1273,7 @@ mod tests { test::run_empty_local_repo_test(|repo| { // Create empty watcher status let watcher_status = WatcherStatus { - untracked: HashSet::new(), + created: HashSet::new(), modified: HashSet::new(), removed: HashSet::new(), scan_complete: true, @@ -1305,7 +1305,7 @@ mod tests { untracked.insert(PathBuf::from("dir/subdir/file3.txt")); let watcher_status = WatcherStatus { - untracked: untracked.clone(), + created: untracked.clone(), modified: HashSet::new(), removed: HashSet::new(), scan_complete: true, @@ -1350,7 +1350,7 @@ mod tests { removed.insert(PathBuf::from("dir/removed2.txt")); let watcher_status = WatcherStatus { - untracked: HashSet::new(), + created: HashSet::new(), modified, removed, scan_complete: true, @@ -1398,7 +1398,7 @@ mod tests { modified.insert(PathBuf::from("dir2/modified.txt")); let watcher_status = WatcherStatus { - untracked, + created: untracked, modified, removed: HashSet::new(), scan_complete: true, @@ -1444,7 +1444,7 @@ mod tests { untracked.insert(PathBuf::from("data/file.txt")); let watcher_status = WatcherStatus { - untracked, + created: untracked, modified: HashSet::new(), removed: HashSet::new(), scan_complete: true, @@ -1484,7 +1484,7 @@ mod tests { untracked.insert(PathBuf::from("d/file6.txt")); let watcher_status = WatcherStatus { - untracked, + created: untracked, modified: HashSet::new(), removed: HashSet::new(), scan_complete: true, @@ -1527,7 +1527,7 @@ mod tests { removed.insert(PathBuf::from("deleted2.txt")); let watcher_status = WatcherStatus { - untracked: untracked.clone(), + created: untracked.clone(), modified: modified.clone(), removed: removed.clone(), scan_complete: true, diff --git a/oxen-rust/src/lib/src/core/v_latest/watcher_client.rs b/oxen-rust/src/lib/src/core/v_latest/watcher_client.rs index 20619ecb5..24cda5e87 100644 --- a/oxen-rust/src/lib/src/core/v_latest/watcher_client.rs +++ b/oxen-rust/src/lib/src/core/v_latest/watcher_client.rs @@ -1,5 +1,6 @@ use crate::error::OxenError; use crate::model::LocalRepository; +use serde::{Deserialize, Serialize}; use std::collections::HashSet; use std::path::PathBuf; use std::time::SystemTime; @@ -14,10 +15,15 @@ pub struct WatcherClient { /// Status data received from the watcher #[derive(Debug, Clone)] pub struct WatcherStatus { - pub untracked: HashSet, + /// Files created since watcher started (includes untracked files from initial scan) + pub created: HashSet, + /// Files modified since watcher started pub modified: HashSet, + /// Files removed since watcher started pub removed: HashSet, + /// Whether the initial scan is complete pub scan_complete: bool, + /// Last update time pub last_updated: SystemTime, } @@ -92,14 +98,14 @@ impl WatcherClient { // Deserialize response let response: WatcherResponse = rmp_serde::from_slice(&response_buf) .map_err(|e| OxenError::basic_str(&format!("Failed to deserialize response: {}", e)))?; - + // Gracefully shutdown the connection let _ = stream.shutdown().await; // Convert response to WatcherStatus match response { WatcherResponse::Status(status_result) => Ok(WatcherStatus { - untracked: status_result.untracked.into_iter().collect(), + created: status_result.created.into_iter().map(|f| f.path).collect(), modified: status_result.modified.into_iter().map(|f| f.path).collect(), removed: status_result.removed.into_iter().collect(), scan_complete: status_result.scan_complete, @@ -162,12 +168,12 @@ impl WatcherClient { } } -// We need to define the protocol types here temporarily -// In a real implementation, these would be imported from the watcher crate -// or defined in a shared protocol module +// +// Protocol types shared between liboxen and oxen-watcher +// -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -enum WatcherRequest { +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum WatcherRequest { GetStatus { paths: Option> }, GetSummary, Refresh { paths: Vec }, @@ -175,31 +181,64 @@ enum WatcherRequest { Ping, } -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -enum WatcherResponse { +impl WatcherRequest { + pub fn to_bytes(&self) -> Result, rmp_serde::encode::Error> { + rmp_serde::to_vec(self) + } + + pub fn from_bytes(bytes: &[u8]) -> Result { + rmp_serde::from_slice(bytes) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum WatcherResponse { Status(StatusResult), Summary { + created: usize, modified: usize, - added: usize, removed: usize, - untracked: usize, last_updated: SystemTime, }, Ok, Error(String), } -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -struct StatusResult { +impl WatcherResponse { + pub fn to_bytes(&self) -> Result, rmp_serde::encode::Error> { + rmp_serde::to_vec(self) + } + + pub fn from_bytes(bytes: &[u8]) -> Result { + rmp_serde::from_slice(bytes) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StatusResult { + pub created: Vec, pub modified: Vec, - pub added: Vec, pub removed: Vec, - pub untracked: Vec, pub scan_complete: bool, } -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -struct FileStatus { +impl std::fmt::Display for StatusResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.created.iter().for_each(|status| { + writeln!(f, "[Created]\t{}", status.path.display()).unwrap(); + }); + self.modified.iter().for_each(|status| { + writeln!(f, "[Modified]\t{}", status.path.display()).unwrap(); + }); + self.removed.iter().for_each(|path| { + writeln!(f, "[Removed]\t{}", path.display()).unwrap(); + }); + Ok(()) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileStatus { pub path: PathBuf, pub mtime: SystemTime, pub size: u64, @@ -207,10 +246,24 @@ struct FileStatus { pub status: FileStatusType, } -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -enum FileStatusType { +impl std::fmt::Display for FileStatus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "[{:?}] {} {}", + self.status, + self.mtime + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_millis(), + self.path.display(), + ) + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum FileStatusType { + Created, Modified, - Added, Removed, - Untracked, } diff --git a/oxen-rust/src/watcher/src/cache.rs b/oxen-rust/src/watcher/src/cache.rs index a4647226c..0feaacfee 100644 --- a/oxen-rust/src/watcher/src/cache.rs +++ b/oxen-rust/src/watcher/src/cache.rs @@ -21,11 +21,15 @@ pub struct StatusCache { /// In-memory cache data structure struct MemoryCache { + /// Files created since watcher started + created: HashMap, + /// Files modified since watcher started modified: HashMap, - added: HashMap, + /// Files removed since watcher started removed: HashMap, - untracked: HashMap, + /// Whether initial scan is complete scan_complete: bool, + /// Last update time last_update: SystemTime, } @@ -37,10 +41,9 @@ impl StatusCache { // Initialize memory cache let cache = Arc::new(RwLock::new(MemoryCache { + created: HashMap::new(), modified: HashMap::new(), - added: HashMap::new(), removed: HashMap::new(), - untracked: HashMap::new(), scan_complete: false, last_update: SystemTime::now(), })); @@ -53,18 +56,18 @@ impl StatusCache { let cache = self.cache.read().await; // Filter by paths if requested - let (modified, added, removed, untracked) = if let Some(paths) = paths { + let (created, modified, removed) = if let Some(paths) = paths { let path_set: std::collections::HashSet<_> = paths.iter().collect(); ( cache - .modified + .created .values() .filter(|f| path_set.contains(&f.path)) .cloned() .collect(), cache - .added + .modified .values() .filter(|f| path_set.contains(&f.path)) .cloned() @@ -75,19 +78,12 @@ impl StatusCache { .filter(|p| path_set.contains(p)) .cloned() .collect(), - cache - .untracked - .keys() - .filter(|p| path_set.contains(p)) - .cloned() - .collect(), ) } else { ( + cache.created.values().cloned().collect(), cache.modified.values().cloned().collect(), - cache.added.values().cloned().collect(), cache.removed.keys().cloned().collect(), - cache.untracked.keys().cloned().collect(), ) }; @@ -96,10 +92,9 @@ impl StatusCache { } StatusResult { + created, modified, - added, removed, - untracked, scan_complete: cache.scan_complete, } } @@ -109,28 +104,25 @@ impl StatusCache { pub async fn update_file_status(&self, status: FileStatus) -> Result<(), WatcherError> { let mut cache = self.cache.write().await; - // Update memory cache + // Update memory cache based on event type match status.status { + FileStatusType::Created => { + cache.created.insert(status.path.clone(), status.clone()); + // If a file is created, it's no longer modified or removed + cache.modified.remove(&status.path); + cache.removed.remove(&status.path); + } FileStatusType::Modified => { cache.modified.insert(status.path.clone(), status.clone()); - cache.added.remove(&status.path); - cache.untracked.remove(&status.path); - } - FileStatusType::Added => { - cache.added.insert(status.path.clone(), status.clone()); - cache.modified.remove(&status.path); - cache.untracked.remove(&status.path); + // A modified file might have been previously created, keep that status + // But it's definitely not removed + cache.removed.remove(&status.path); } FileStatusType::Removed => { cache.removed.insert(status.path.clone(), status.clone()); + // If removed, clear from created and modified + cache.created.remove(&status.path); cache.modified.remove(&status.path); - cache.added.remove(&status.path); - cache.untracked.remove(&status.path); - } - FileStatusType::Untracked => { - cache.untracked.insert(status.path.clone(), status.clone()); - cache.modified.remove(&status.path); - cache.added.remove(&status.path); } } @@ -144,28 +136,21 @@ impl StatusCache { let mut cache = self.cache.write().await; for status in statuses { - // Update memory cache + // Update memory cache based on event type match status.status { + FileStatusType::Created => { + cache.created.insert(status.path.clone(), status.clone()); + cache.modified.remove(&status.path); + cache.removed.remove(&status.path); + } FileStatusType::Modified => { cache.modified.insert(status.path.clone(), status.clone()); - cache.added.remove(&status.path); - cache.untracked.remove(&status.path); - } - FileStatusType::Added => { - cache.added.insert(status.path.clone(), status.clone()); - cache.modified.remove(&status.path); - cache.untracked.remove(&status.path); + cache.removed.remove(&status.path); } FileStatusType::Removed => { cache.removed.insert(status.path.clone(), status.clone()); + cache.created.remove(&status.path); cache.modified.remove(&status.path); - cache.added.remove(&status.path); - cache.untracked.remove(&status.path); - } - FileStatusType::Untracked => { - cache.untracked.insert(status.path.clone(), status.clone()); - cache.modified.remove(&status.path); - cache.added.remove(&status.path); } } } @@ -186,10 +171,9 @@ impl StatusCache { #[allow(dead_code)] // Used in tests pub async fn clear(&self) -> Result<(), WatcherError> { let mut cache = self.cache.write().await; + cache.created.clear(); cache.modified.clear(); - cache.added.clear(); cache.removed.clear(); - cache.untracked.clear(); cache.scan_complete = false; cache.last_update = SystemTime::now(); Ok(()) diff --git a/oxen-rust/src/watcher/src/cache_test.rs b/oxen-rust/src/watcher/src/cache_test.rs index 0efec1f22..976ff7778 100644 --- a/oxen-rust/src/watcher/src/cache_test.rs +++ b/oxen-rust/src/watcher/src/cache_test.rs @@ -31,10 +31,9 @@ mod tests { let (cache, _temp_dir) = setup_test_cache().await; let status = cache.get_status(None).await; + assert!(status.created.is_empty()); assert!(status.modified.is_empty()); - assert!(status.added.is_empty()); assert!(status.removed.is_empty()); - assert!(status.untracked.is_empty()); assert!(!status.scan_complete); } @@ -55,9 +54,8 @@ mod tests { let status = cache.get_status(None).await; assert_eq!(status.modified.len(), 1); assert_eq!(status.modified[0].path, PathBuf::from("test.txt")); - assert!(status.added.is_empty()); + assert!(status.created.is_empty()); assert!(status.removed.is_empty()); - assert!(status.untracked.is_empty()); } #[tokio::test] @@ -70,7 +68,7 @@ mod tests { mtime: SystemTime::now(), size: 100, hash: None, - status: FileStatusType::Added, + status: FileStatusType::Created, }, FileStatus { path: PathBuf::from("file2.txt"), @@ -91,10 +89,9 @@ mod tests { cache.batch_update(statuses).await.unwrap(); let status = cache.get_status(None).await; - assert_eq!(status.added.len(), 1); + assert_eq!(status.created.len(), 1); assert_eq!(status.modified.len(), 1); assert_eq!(status.removed.len(), 1); - assert!(status.untracked.is_empty()); } #[tokio::test] @@ -103,43 +100,44 @@ mod tests { let path = PathBuf::from("test.txt"); - // Start as untracked + // Start as created cache.update_file_status(FileStatus { path: path.clone(), mtime: SystemTime::now(), size: 100, hash: None, - status: FileStatusType::Untracked, + status: FileStatusType::Created, }).await.unwrap(); let status = cache.get_status(None).await; - assert_eq!(status.untracked.len(), 1); + assert_eq!(status.created.len(), 1); - // Transition to added + // Transition to modified cache.update_file_status(FileStatus { path: path.clone(), mtime: SystemTime::now(), - size: 100, - hash: Some("hash".to_string()), - status: FileStatusType::Added, + size: 150, + hash: Some("newhash".to_string()), + status: FileStatusType::Modified, }).await.unwrap(); let status = cache.get_status(None).await; - assert_eq!(status.added.len(), 1); - assert_eq!(status.untracked.len(), 0); + assert_eq!(status.modified.len(), 1); + assert_eq!(status.created.len(), 1); // Created status is preserved - // Transition to modified + // Transition to removed cache.update_file_status(FileStatus { path: path.clone(), mtime: SystemTime::now(), - size: 150, - hash: Some("newhash".to_string()), - status: FileStatusType::Modified, + size: 0, + hash: None, + status: FileStatusType::Removed, }).await.unwrap(); let status = cache.get_status(None).await; - assert_eq!(status.modified.len(), 1); - assert_eq!(status.added.len(), 0); + assert_eq!(status.removed.len(), 1); + assert_eq!(status.created.len(), 0); // Removed clears created + assert_eq!(status.modified.len(), 0); // Removed clears modified } #[tokio::test] @@ -199,7 +197,7 @@ mod tests { mtime: SystemTime::now(), size: 100, hash: None, - status: FileStatusType::Added, + status: FileStatusType::Created, }, FileStatus { path: PathBuf::from("file2.txt"), @@ -215,7 +213,7 @@ mod tests { // Verify data exists let status = cache.get_status(None).await; - assert_eq!(status.added.len(), 1); + assert_eq!(status.created.len(), 1); assert_eq!(status.modified.len(), 1); assert!(status.scan_complete); @@ -224,7 +222,7 @@ mod tests { // Verify cache is empty let status = cache.get_status(None).await; - assert!(status.added.is_empty()); + assert!(status.created.is_empty()); assert!(status.modified.is_empty()); assert!(!status.scan_complete); } diff --git a/oxen-rust/src/watcher/src/cli.rs b/oxen-rust/src/watcher/src/cli.rs index 98b7c4881..8bee18ada 100644 --- a/oxen-rust/src/watcher/src/cli.rs +++ b/oxen-rust/src/watcher/src/cli.rs @@ -30,4 +30,4 @@ pub enum Commands { #[arg(short, long)] repo: PathBuf, }, -} \ No newline at end of file +} diff --git a/oxen-rust/src/watcher/src/event_processor.rs b/oxen-rust/src/watcher/src/event_processor.rs index be8639af9..da08de37e 100644 --- a/oxen-rust/src/watcher/src/event_processor.rs +++ b/oxen-rust/src/watcher/src/event_processor.rs @@ -22,28 +22,28 @@ impl EventProcessor { pub fn new(cache: Arc, repo_path: PathBuf) -> Self { Self { cache, repo_path } } - + /// Run the event processing loop pub async fn run(self, mut event_rx: mpsc::Receiver) { // Buffer for coalescing events let mut event_buffer: HashMap = HashMap::new(); let coalesce_window = Duration::from_millis(100); let batch_size = 1000; - + let mut interval = time::interval(coalesce_window); - + loop { tokio::select! { // Process incoming events Some(event) = event_rx.recv() => { self.handle_event(event, &mut event_buffer); - + // Flush if buffer is getting large if event_buffer.len() >= batch_size { self.flush_events(&mut event_buffer).await; } } - + // Periodic flush of coalesced events _ = interval.tick() => { if !event_buffer.is_empty() { @@ -53,55 +53,57 @@ impl EventProcessor { } } } - + /// Handle a single filesystem event fn handle_event(&self, event: Event, buffer: &mut HashMap) { trace!("Received event: {:?}", event); - + for path in event.paths { // Skip .oxen directory if path.components().any(|c| c.as_os_str() == ".oxen") { continue; } - + // Skip non-file events for now if path.is_dir() { continue; } - + // Coalesce events for the same path buffer.insert(path, (event.kind, Instant::now())); } } - + /// Flush buffered events to the cache async fn flush_events(&self, buffer: &mut HashMap) { if buffer.is_empty() { return; } - + debug!("Flushing {} events to cache", buffer.len()); - + let mut updates = Vec::new(); let now = Instant::now(); let stale_threshold = Duration::from_millis(200); - + // Process each buffered event for (path, (kind, timestamp)) in buffer.drain() { + trace!("Processing event for path '{}': {:?}", path.display(), kind); + // Skip stale events if now.duration_since(timestamp) > stale_threshold { + debug!("Skipping stale event for path: {}", path.display()); continue; } - - // Determine the status type based on event kind and file existence + + // Determine the status type based on event kind let status_type = match kind { EventKind::Create(_) => { // New file created - FileStatusType::Untracked + FileStatusType::Created } EventKind::Modify(_) => { - // File modified - need to check if it's tracked - // For now, assume modified if it exists + // File modified FileStatusType::Modified } EventKind::Remove(_) => { @@ -113,7 +115,7 @@ impl EventProcessor { continue; } }; - + // Get file metadata if it exists let (mtime, size) = if let Ok(metadata) = std::fs::metadata(&path) { ( @@ -127,7 +129,7 @@ impl EventProcessor { // Skip if we can't get metadata for non-removed files continue; }; - + // Convert absolute path to relative path let relative_path = match path.strip_prefix(&self.repo_path) { Ok(rel) => rel.to_path_buf(), @@ -136,7 +138,7 @@ impl EventProcessor { continue; } }; - + updates.push(FileStatus { path: relative_path, mtime, @@ -145,7 +147,7 @@ impl EventProcessor { status: status_type, }); } - + // Batch update the cache if !updates.is_empty() { if let Err(e) = self.cache.batch_update(updates).await { @@ -153,4 +155,4 @@ impl EventProcessor { } } } -} \ No newline at end of file +} diff --git a/oxen-rust/src/watcher/src/event_processor_test.rs b/oxen-rust/src/watcher/src/event_processor_test.rs index 31b300a21..a8719af1f 100644 --- a/oxen-rust/src/watcher/src/event_processor_test.rs +++ b/oxen-rust/src/watcher/src/event_processor_test.rs @@ -78,9 +78,9 @@ mod tests { // Should have no entries let status = cache.get_status(None).await; - assert!(status.added.is_empty()); - assert!(status.untracked.is_empty()); + assert!(status.created.is_empty()); assert!(status.modified.is_empty()); + assert!(status.removed.is_empty()); } #[tokio::test] @@ -103,8 +103,8 @@ mod tests { // Should have no entries (directories are skipped) let status = cache.get_status(None).await; - assert!(status.added.is_empty()); - assert!(status.untracked.is_empty()); + assert!(status.created.is_empty()); + assert!(status.modified.is_empty()); } #[tokio::test] @@ -131,7 +131,7 @@ mod tests { // Should have all files let status = cache.get_status(None).await; - let total = status.added.len() + status.untracked.len() + status.modified.len(); + let total = status.created.len() + status.modified.len() + status.removed.len(); assert!(total > 0, "Should have processed some files"); assert!(total <= 10, "Should not exceed number of files sent"); } @@ -184,8 +184,7 @@ mod tests { let status = cache.get_status(None).await; // Should have entries in different categories - let total = status.added.len() - + status.untracked.len() + let total = status.created.len() + status.modified.len() + status.removed.len(); assert!(total > 0, "Should have processed events"); @@ -223,8 +222,8 @@ mod tests { // Should have no entries let status = cache.get_status(None).await; - assert!(status.added.is_empty()); - assert!(status.untracked.is_empty()); + assert!(status.created.is_empty()); assert!(status.modified.is_empty()); + assert!(status.removed.is_empty()); } } diff --git a/oxen-rust/src/watcher/src/ipc.rs b/oxen-rust/src/watcher/src/ipc.rs index 003514d7d..1a07e3bb9 100644 --- a/oxen-rust/src/watcher/src/ipc.rs +++ b/oxen-rust/src/watcher/src/ipc.rs @@ -108,10 +108,9 @@ async fn handle_client( WatcherRequest::GetSummary => { let status = cache.get_status(None).await; WatcherResponse::Summary { + created: status.created.len(), modified: status.modified.len(), - added: status.added.len(), removed: status.removed.len(), - untracked: status.untracked.len(), last_updated: std::time::SystemTime::now(), } } diff --git a/oxen-rust/src/watcher/src/lib.rs b/oxen-rust/src/watcher/src/lib.rs index f2e8edb22..d2175ae06 100644 --- a/oxen-rust/src/watcher/src/lib.rs +++ b/oxen-rust/src/watcher/src/lib.rs @@ -7,4 +7,4 @@ pub mod monitor; pub mod protocol; pub use error::WatcherError; -pub use protocol::{WatcherRequest, WatcherResponse}; \ No newline at end of file +pub use protocol::{FileStatus, FileStatusType, StatusResult, WatcherRequest, WatcherResponse}; \ No newline at end of file diff --git a/oxen-rust/src/watcher/src/main.rs b/oxen-rust/src/watcher/src/main.rs index c90761c8b..0db0be424 100644 --- a/oxen-rust/src/watcher/src/main.rs +++ b/oxen-rust/src/watcher/src/main.rs @@ -16,9 +16,9 @@ use crate::error::WatcherError; #[tokio::main] async fn main() -> Result<(), WatcherError> { env_logger::init(); - + let args = Args::parse(); - + match args.command { cli::Commands::Start { repo } => { info!("Starting watcher for repository: {}", repo.display()); @@ -41,7 +41,7 @@ async fn start_watcher(repo_path: PathBuf) -> Result<(), WatcherError> { info!("Watcher is already running for this repository"); return Ok(()); } - + // Initialize and run the watcher let watcher = monitor::FileSystemWatcher::new(repo_path)?; watcher.run().await @@ -49,7 +49,7 @@ async fn start_watcher(repo_path: PathBuf) -> Result<(), WatcherError> { async fn stop_watcher(repo_path: PathBuf) -> Result<(), WatcherError> { let socket_path = repo_path.join(".oxen/watcher.sock"); - + // Send shutdown request match ipc::send_request(&socket_path, protocol::WatcherRequest::Shutdown).await { Ok(_) => { @@ -79,10 +79,10 @@ async fn check_status(repo_path: PathBuf) -> Result<(), WatcherError> { async fn is_watcher_running(repo_path: &Path) -> Result { let socket_path = repo_path.join(".oxen/watcher.sock"); - + // Try to ping the watcher match ipc::send_request(&socket_path, protocol::WatcherRequest::Ping).await { Ok(protocol::WatcherResponse::Ok) => Ok(true), _ => Ok(false), } -} \ No newline at end of file +} diff --git a/oxen-rust/src/watcher/src/monitor.rs b/oxen-rust/src/watcher/src/monitor.rs index 3f2d9fbaf..94e7074db 100644 --- a/oxen-rust/src/watcher/src/monitor.rs +++ b/oxen-rust/src/watcher/src/monitor.rs @@ -118,77 +118,61 @@ async fn initial_scan(repo_path: PathBuf, cache: Arc) -> Result<(), // Load the repository let repo = LocalRepository::from_dir(&repo_path)?; - // Use Oxen's existing status implementation for initial state + // Use liboxen status implementation to establish a baseline. + // Then the watcher tracks changes relative to this baseline. match liboxen::repositories::status::status(&repo) { Ok(status) => { let mut file_statuses = Vec::new(); - // Convert Oxen status to our format - for path in status.modified_files { - if let Ok(metadata) = std::fs::metadata(repo_path.join(path.clone())) { + // Convert untracked files to "Created" events + for path in &status.untracked_files { + if let Ok(metadata) = std::fs::metadata(repo_path.join(path)) { file_statuses.push(crate::protocol::FileStatus { path: path.clone(), mtime: metadata.modified().unwrap_or(std::time::SystemTime::now()), size: metadata.len(), hash: None, - status: crate::protocol::FileStatusType::Modified, + status: crate::protocol::FileStatusType::Created, }); } } - for (path, entry) in status.staged_files { - let file_status_type = match entry.status { - liboxen::model::StagedEntryStatus::Added => { - crate::protocol::FileStatusType::Added - } - liboxen::model::StagedEntryStatus::Modified => { - crate::protocol::FileStatusType::Modified - } - liboxen::model::StagedEntryStatus::Removed => { - crate::protocol::FileStatusType::Removed - } - liboxen::model::StagedEntryStatus::Unmodified => { - continue; // Skip unmodified files - } - }; - - let (mtime, size) = - if let Ok(metadata) = std::fs::metadata(repo_path.join(path.clone())) { - ( - metadata.modified().unwrap_or(std::time::SystemTime::now()), - metadata.len(), - ) - } else { - // File might not exist if it was removed - (std::time::SystemTime::now(), 0) - }; - - file_statuses.push(crate::protocol::FileStatus { - path: path.clone(), - mtime, - size, - hash: Some(entry.hash), - status: file_status_type, - }); - } - - for path in status.untracked_files { - if let Ok(metadata) = std::fs::metadata(repo_path.join(path.clone())) { + // Convert modified files to "Modified" events + for path in &status.modified_files { + if let Ok(metadata) = std::fs::metadata(repo_path.join(path)) { file_statuses.push(crate::protocol::FileStatus { path: path.clone(), mtime: metadata.modified().unwrap_or(std::time::SystemTime::now()), size: metadata.len(), hash: None, - status: crate::protocol::FileStatusType::Untracked, + status: crate::protocol::FileStatusType::Modified, }); } } - // Batch update the cache - cache.batch_update(file_statuses).await?; - cache.mark_scan_complete().await?; + // Convert removed files to "Removed" events + for path in &status.removed_files { + // For removed files, we can't get metadata since they don't exist + file_statuses.push(crate::protocol::FileStatus { + path: path.clone(), + mtime: std::time::SystemTime::now(), + size: 0, + hash: None, + status: crate::protocol::FileStatusType::Removed, + }); + } + + // Batch update the cache with initial state + let total_files = status.untracked_files.len() + + status.modified_files.len() + + status.removed_files.len(); + if !file_statuses.is_empty() { + cache.batch_update(file_statuses).await?; + info!("Populated cache with {} initial file states", total_files); + } - info!("Initial scan complete"); + cache.mark_scan_complete().await?; + info!("Initial scan complete - established baseline, now tracking filesystem changes"); } Err(e) => { error!("Failed to get initial status: {}", e); diff --git a/oxen-rust/src/watcher/src/protocol.rs b/oxen-rust/src/watcher/src/protocol.rs index 3501b3be1..c7ce41548 100644 --- a/oxen-rust/src/watcher/src/protocol.rs +++ b/oxen-rust/src/watcher/src/protocol.rs @@ -1,145 +1,7 @@ -use serde::{Deserialize, Serialize}; -use std::fmt::{Debug, Display}; -use std::path::PathBuf; -use std::time::SystemTime; +// Re-export protocol types from liboxen in case we move them in the future +pub use liboxen::core::v_latest::watcher_client::{ + FileStatus, FileStatusType, StatusResult, WatcherRequest, WatcherResponse, +}; #[path = "protocol_test.rs"] mod protocol_test; - -/// Request messages sent from CLI to Watcher -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum WatcherRequest { - /// Get the current status of the repository - GetStatus { - /// Optional paths to filter status for - paths: Option>, - }, - /// Get a summary of changes (just counts) - GetSummary, - /// Force a refresh/rescan of specific paths - Refresh { paths: Vec }, - /// Shutdown the watcher daemon - Shutdown, - /// Health check ping - Ping, -} - -/// Response messages sent from Watcher to CLI -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum WatcherResponse { - /// Full status result - Status(StatusResult), - /// Summary of changes - Summary { - modified: usize, - added: usize, - removed: usize, - untracked: usize, - last_updated: SystemTime, - }, - /// Simple acknowledgment - Ok, - /// Error response - Error(String), -} - -/// Detailed status result -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct StatusResult { - pub modified: Vec, - pub added: Vec, - pub removed: Vec, - pub untracked: Vec, - /// False if still doing initial scan - pub scan_complete: bool, -} - -impl Display for StatusResult { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.modified.iter().for_each(|status| { - writeln!(f, "{}", status).unwrap(); - }); - self.removed.iter().for_each(|path| { - writeln!(f, "[Removed]\t{:?}", path).unwrap(); - }); - self.untracked.iter().for_each(|path| { - writeln!(f, "[Untracked]\t{:?}", path).unwrap(); - }); - Ok(()) - } -} - -/// Status of a single file -#[derive(Clone, Serialize, Deserialize)] -pub struct FileStatus { - pub path: PathBuf, - pub mtime: SystemTime, - pub size: u64, - pub hash: Option, - pub status: FileStatusType, -} - -impl Debug for FileStatus { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("FileStatus") - .field("path", &self.path) - .field( - "mtime", - &self - .mtime - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap() - .as_millis(), - ) - .field("status", &self.status) - .finish() - } -} - -impl Display for FileStatus { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[{:?}] {} {}", - self.status, - self.mtime - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap() - .as_millis(), - self.path.display(), - ) - } -} - -/// Type of file status -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub enum FileStatusType { - Modified, - Added, - Removed, - Untracked, -} - -impl WatcherRequest { - /// Serialize request to MessagePack bytes - pub fn to_bytes(&self) -> Result, rmp_serde::encode::Error> { - rmp_serde::to_vec(self) - } - - /// Deserialize request from MessagePack bytes - pub fn from_bytes(bytes: &[u8]) -> Result { - rmp_serde::from_slice(bytes) - } -} - -impl WatcherResponse { - /// Serialize response to MessagePack bytes - pub fn to_bytes(&self) -> Result, rmp_serde::encode::Error> { - rmp_serde::to_vec(self) - } - - /// Deserialize response from MessagePack bytes - pub fn from_bytes(bytes: &[u8]) -> Result { - rmp_serde::from_slice(bytes) - } -} diff --git a/oxen-rust/src/watcher/src/protocol_test.rs b/oxen-rust/src/watcher/src/protocol_test.rs index 4abcec8be..1a4ac59b4 100644 --- a/oxen-rust/src/watcher/src/protocol_test.rs +++ b/oxen-rust/src/watcher/src/protocol_test.rs @@ -25,10 +25,9 @@ mod tests { #[test] fn test_response_serialization() { let response = WatcherResponse::Summary { + created: 3, modified: 5, - added: 3, removed: 2, - untracked: 10, last_updated: SystemTime::now(), }; @@ -36,11 +35,10 @@ mod tests { let deserialized = WatcherResponse::from_bytes(&bytes).unwrap(); match deserialized { - WatcherResponse::Summary { modified, added, removed, untracked, .. } => { + WatcherResponse::Summary { created, modified, removed, .. } => { + assert_eq!(created, 3); assert_eq!(modified, 5); - assert_eq!(added, 3); assert_eq!(removed, 2); - assert_eq!(untracked, 10); } _ => panic!("Wrong response type"), } @@ -49,6 +47,13 @@ mod tests { #[test] fn test_status_result_serialization() { let status_result = StatusResult { + created: vec![FileStatus { + path: PathBuf::from("created.txt"), + mtime: SystemTime::now(), + size: 200, + hash: None, + status: FileStatusType::Created, + }], modified: vec![FileStatus { path: PathBuf::from("modified.txt"), mtime: SystemTime::now(), @@ -56,15 +61,7 @@ mod tests { hash: Some("hash1".to_string()), status: FileStatusType::Modified, }], - added: vec![FileStatus { - path: PathBuf::from("added.txt"), - mtime: SystemTime::now(), - size: 200, - hash: None, - status: FileStatusType::Added, - }], removed: vec![PathBuf::from("removed.txt")], - untracked: vec![PathBuf::from("untracked.txt")], scan_complete: true, }; @@ -74,16 +71,14 @@ mod tests { match deserialized { WatcherResponse::Status(result) => { + assert_eq!(result.created.len(), 1); assert_eq!(result.modified.len(), 1); - assert_eq!(result.added.len(), 1); assert_eq!(result.removed.len(), 1); - assert_eq!(result.untracked.len(), 1); assert!(result.scan_complete); + assert_eq!(result.created[0].path, PathBuf::from("created.txt")); assert_eq!(result.modified[0].path, PathBuf::from("modified.txt")); - assert_eq!(result.added[0].path, PathBuf::from("added.txt")); assert_eq!(result.removed[0], PathBuf::from("removed.txt")); - assert_eq!(result.untracked[0], PathBuf::from("untracked.txt")); } _ => panic!("Wrong response type"), } @@ -117,13 +112,13 @@ mod tests { #[test] fn test_file_status_type_equality() { + assert_eq!(FileStatusType::Created, FileStatusType::Created); assert_eq!(FileStatusType::Modified, FileStatusType::Modified); - assert_eq!(FileStatusType::Added, FileStatusType::Added); assert_eq!(FileStatusType::Removed, FileStatusType::Removed); - assert_eq!(FileStatusType::Untracked, FileStatusType::Untracked); - assert_ne!(FileStatusType::Modified, FileStatusType::Added); - assert_ne!(FileStatusType::Added, FileStatusType::Removed); + assert_ne!(FileStatusType::Created, FileStatusType::Modified); + assert_ne!(FileStatusType::Modified, FileStatusType::Removed); + assert_ne!(FileStatusType::Created, FileStatusType::Removed); } #[test] @@ -155,10 +150,9 @@ mod tests { } let status_result = StatusResult { + created: vec![], modified, - added: vec![], removed: vec![], - untracked: vec![], scan_complete: true, }; diff --git a/oxen-rust/src/watcher/tests/integration_test.rs b/oxen-rust/src/watcher/tests/integration_test.rs index eba4a9e87..0ca589a33 100644 --- a/oxen-rust/src/watcher/tests/integration_test.rs +++ b/oxen-rust/src/watcher/tests/integration_test.rs @@ -238,14 +238,14 @@ async fn test_watcher_reports_relative_paths() { // Verify the response contains relative paths if let WatcherResponse::Status(status) = response { - // Check that all paths are relative - for path in &status.untracked { - assert!(!path.is_absolute(), "Path should be relative, got: {:?}", path); - assert!(!path.starts_with("/"), "Path should not start with /, got: {:?}", path); + // Check that all created file paths are relative + for file_status in &status.created { + assert!(!file_status.path.is_absolute(), "Path should be relative, got: {:?}", file_status.path); + assert!(!file_status.path.starts_with("/"), "Path should not start with /, got: {:?}", file_status.path); } // Verify specific files are present with correct relative paths - let paths: Vec<_> = status.untracked.iter().map(|p| p.to_string_lossy().to_string()).collect(); + let paths: Vec<_> = status.created.iter().map(|f| f.path.to_string_lossy().to_string()).collect(); assert!(paths.contains(&"root_file.txt".to_string()), "Should contain root_file.txt"); assert!(paths.contains(&"subdir/nested_file.txt".to_string()), "Should contain subdir/nested_file.txt"); } else { From 31cc97aeada590f7ccb3e7f9cd154e20b4bc54f2 Mon Sep 17 00:00:00 2001 From: Joshua Elliott Date: Thu, 28 Aug 2025 18:29:40 -0600 Subject: [PATCH 08/11] Refactor watcher ping function for clarity --- .../lib/src/core/v_latest/watcher_client.rs | 98 ++++++++++--------- 1 file changed, 54 insertions(+), 44 deletions(-) diff --git a/oxen-rust/src/lib/src/core/v_latest/watcher_client.rs b/oxen-rust/src/lib/src/core/v_latest/watcher_client.rs index 24cda5e87..e06f27d2d 100644 --- a/oxen-rust/src/lib/src/core/v_latest/watcher_client.rs +++ b/oxen-rust/src/lib/src/core/v_latest/watcher_client.rs @@ -120,51 +120,61 @@ impl WatcherClient { /// Check if the watcher is responsive pub async fn ping(&self) -> bool { - match UnixStream::connect(&self.socket_path).await { - Ok(mut stream) => { - // Send ping request - let request = WatcherRequest::Ping; - if let Ok(request_bytes) = rmp_serde::to_vec(&request) { - let len = request_bytes.len() as u32; - if stream.write_all(&len.to_le_bytes()).await.is_ok() - && stream.write_all(&request_bytes).await.is_ok() - && stream.flush().await.is_ok() - { - // Try to read response - let mut len_buf = [0u8; 4]; - if stream.read_exact(&mut len_buf).await.is_ok() { - let response_len = u32::from_le_bytes(len_buf) as usize; - if response_len < 1000 { - // Ping response should be small - let mut response_buf = vec![0u8; response_len]; - if stream.read_exact(&mut response_buf).await.is_ok() { - // Gracefully shutdown the connection before checking response - let _ = stream.shutdown().await; - if let Ok(response) = - rmp_serde::from_slice::(&response_buf) - { - matches!(response, WatcherResponse::Ok) - } else { - false - } - } else { - false - } - } else { - false - } - } else { - false - } - } else { - false - } - } else { - false - } - } - Err(_) => false, + // Try to connect to the socket + let Ok(mut stream) = UnixStream::connect(&self.socket_path).await else { + return false; + }; + + // Serialize ping request + let request = WatcherRequest::Ping; + let Ok(request_bytes) = rmp_serde::to_vec(&request) else { + return false; + }; + + // Send request length + let len = request_bytes.len() as u32; + let Ok(_) = stream.write_all(&len.to_le_bytes()).await else { + return false; + }; + + // Send request + let Ok(_) = stream.write_all(&request_bytes).await else { + return false; + }; + + // Flush the stream + let Ok(_) = stream.flush().await else { + return false; + }; + + // Read response length + let mut len_buf = [0u8; 4]; + let Ok(_) = stream.read_exact(&mut len_buf).await else { + return false; + }; + + let response_len = u32::from_le_bytes(len_buf) as usize; + + // Sanity check: ping response should be small + if response_len >= 1000 { + return false; } + + // Read response + let mut response_buf = vec![0u8; response_len]; + let Ok(_) = stream.read_exact(&mut response_buf).await else { + return false; + }; + + // Gracefully shutdown the connection + let _ = stream.shutdown().await; + + // Check if we got an Ok response + let Ok(response) = rmp_serde::from_slice::(&response_buf) else { + return false; + }; + + matches!(response, WatcherResponse::Ok) } } From 5a3b75ebf681a394dc7da96acc53d67298c8e0d8 Mon Sep 17 00:00:00 2001 From: Joshua Elliott Date: Fri, 29 Aug 2025 14:51:55 -0600 Subject: [PATCH 09/11] Add debouncer to watcher [ci-skip] --- oxen-rust/Cargo.lock | 24 +++ oxen-rust/Cargo.toml | 1 + oxen-rust/src/watcher/Cargo.toml | 1 + oxen-rust/src/watcher/src/cache.rs | 74 +++---- oxen-rust/src/watcher/src/event_processor.rs | 195 ++++++++---------- .../src/watcher/src/event_processor_test.rs | 159 ++++++++------ oxen-rust/src/watcher/src/monitor.rs | 60 ++++-- .../src/watcher/tests/integration_test.rs | 44 ++-- 8 files changed, 304 insertions(+), 254 deletions(-) diff --git a/oxen-rust/Cargo.lock b/oxen-rust/Cargo.lock index 7ce4cb8ad..76c6cec97 100644 --- a/oxen-rust/Cargo.lock +++ b/oxen-rust/Cargo.lock @@ -67,6 +67,7 @@ dependencies = [ "mockito", "mp4", "notify", + "notify-debouncer-full", "num_cpus", "os_path", "par-stream", @@ -2503,6 +2504,15 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "file-id" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fc6a637b6dc58414714eddd9170ff187ecb0933d4c7024d1abbd23a3cc26e9" +dependencies = [ + "windows-sys 0.60.2", +] + [[package]] name = "filetime" version = "0.2.25" @@ -4232,6 +4242,19 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "notify-debouncer-full" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "375bd3a138be7bfeff3480e4a623df4cbfb55b79df617c055cd810ba466fa078" +dependencies = [ + "file-id", + "log", + "notify", + "notify-types", + "walkdir", +] + [[package]] name = "notify-types" version = "2.0.0" @@ -4569,6 +4592,7 @@ dependencies = [ "liboxen", "log", "notify", + "notify-debouncer-full", "rmp-serde", "serde", "serde_json", diff --git a/oxen-rust/Cargo.toml b/oxen-rust/Cargo.toml index 0a2c1e9fe..4bf15520c 100644 --- a/oxen-rust/Cargo.toml +++ b/oxen-rust/Cargo.toml @@ -89,6 +89,7 @@ mp4 = "0.14.0" mime = "0.3.17" minus = { version = "5.4.0", features = ["static_output", "search"] } notify = "8.2" +notify-debouncer-full = "0.6" num_cpus = "1.16.0" parking_lot = "0.12.1" par-stream = { version = "0.10.2", features = ["runtime-tokio"] } diff --git a/oxen-rust/src/watcher/Cargo.toml b/oxen-rust/src/watcher/Cargo.toml index 212ff1c9f..8117f54e3 100644 --- a/oxen-rust/src/watcher/Cargo.toml +++ b/oxen-rust/src/watcher/Cargo.toml @@ -18,6 +18,7 @@ path = "src/lib.rs" [dependencies] liboxen = { path = "../lib" } notify = "8.2" +notify-debouncer-full = "0.6" tokio = { version = "1", features = ["full"] } rmp-serde = "1.3.0" serde = { version = "1.0", features = ["derive"] } diff --git a/oxen-rust/src/watcher/src/cache.rs b/oxen-rust/src/watcher/src/cache.rs index 0feaacfee..24c18e6f2 100644 --- a/oxen-rust/src/watcher/src/cache.rs +++ b/oxen-rust/src/watcher/src/cache.rs @@ -33,6 +33,32 @@ struct MemoryCache { last_update: SystemTime, } +impl MemoryCache { + /// Helper function to update cache for a single file status + fn update_single_status(&mut self, status: FileStatus) { + match status.status { + FileStatusType::Created => { + self.created.insert(status.path.clone(), status.clone()); + // If a file is created, it's no longer modified or removed + self.modified.remove(&status.path); + self.removed.remove(&status.path); + } + FileStatusType::Modified => { + self.modified.insert(status.path.clone(), status.clone()); + // A modified file might have been previously created, keep that status + // But it's definitely not removed + self.removed.remove(&status.path); + } + FileStatusType::Removed => { + self.removed.insert(status.path.clone(), status.clone()); + // If removed, clear from created and modified + self.created.remove(&status.path); + self.modified.remove(&status.path); + } + } + } +} + impl StatusCache { /// Create a new status cache for a repository pub fn new(repo_path: &Path) -> Result { @@ -103,60 +129,20 @@ impl StatusCache { #[allow(dead_code)] // Used in tests pub async fn update_file_status(&self, status: FileStatus) -> Result<(), WatcherError> { let mut cache = self.cache.write().await; - - // Update memory cache based on event type - match status.status { - FileStatusType::Created => { - cache.created.insert(status.path.clone(), status.clone()); - // If a file is created, it's no longer modified or removed - cache.modified.remove(&status.path); - cache.removed.remove(&status.path); - } - FileStatusType::Modified => { - cache.modified.insert(status.path.clone(), status.clone()); - // A modified file might have been previously created, keep that status - // But it's definitely not removed - cache.removed.remove(&status.path); - } - FileStatusType::Removed => { - cache.removed.insert(status.path.clone(), status.clone()); - // If removed, clear from created and modified - cache.created.remove(&status.path); - cache.modified.remove(&status.path); - } - } - + cache.update_single_status(status); cache.last_update = SystemTime::now(); - Ok(()) } /// Batch update multiple file statuses pub async fn batch_update(&self, statuses: Vec) -> Result<(), WatcherError> { let mut cache = self.cache.write().await; - + for status in statuses { - // Update memory cache based on event type - match status.status { - FileStatusType::Created => { - cache.created.insert(status.path.clone(), status.clone()); - cache.modified.remove(&status.path); - cache.removed.remove(&status.path); - } - FileStatusType::Modified => { - cache.modified.insert(status.path.clone(), status.clone()); - cache.removed.remove(&status.path); - } - FileStatusType::Removed => { - cache.removed.insert(status.path.clone(), status.clone()); - cache.created.remove(&status.path); - cache.modified.remove(&status.path); - } - } + cache.update_single_status(status); } - + cache.last_update = SystemTime::now(); - Ok(()) } diff --git a/oxen-rust/src/watcher/src/event_processor.rs b/oxen-rust/src/watcher/src/event_processor.rs index da08de37e..82995d4d8 100644 --- a/oxen-rust/src/watcher/src/event_processor.rs +++ b/oxen-rust/src/watcher/src/event_processor.rs @@ -1,13 +1,12 @@ use crate::cache::StatusCache; use crate::protocol::{FileStatus, FileStatusType}; -use log::{debug, error, trace}; -use notify::{Event, EventKind}; -use std::collections::HashMap; +use liboxen::util; +use log::{debug, error, trace, warn}; +use notify::EventKind; +use notify_debouncer_full::{DebounceEventResult, DebouncedEvent}; use std::path::PathBuf; use std::sync::Arc; -use std::time::{Duration, Instant}; use tokio::sync::mpsc; -use tokio::time; #[path = "event_processor_test.rs"] mod event_processor_test; @@ -20,136 +19,110 @@ pub struct EventProcessor { impl EventProcessor { pub fn new(cache: Arc, repo_path: PathBuf) -> Self { + // Canonicalize the repo path once to handle symlinks properly + let repo_path = repo_path.canonicalize().unwrap_or(repo_path); Self { cache, repo_path } } /// Run the event processing loop - pub async fn run(self, mut event_rx: mpsc::Receiver) { - // Buffer for coalescing events - let mut event_buffer: HashMap = HashMap::new(); - let coalesce_window = Duration::from_millis(100); - let batch_size = 1000; - - let mut interval = time::interval(coalesce_window); - + pub async fn run(self, mut event_rx: mpsc::Receiver) { loop { - tokio::select! { - // Process incoming events - Some(event) = event_rx.recv() => { - self.handle_event(event, &mut event_buffer); - - // Flush if buffer is getting large - if event_buffer.len() >= batch_size { - self.flush_events(&mut event_buffer).await; - } + // Wait for debounced events + match event_rx.recv().await { + Some(Ok(events)) => { + // Process the batch of debounced events + self.handle_debounced_events(events).await; } - - // Periodic flush of coalesced events - _ = interval.tick() => { - if !event_buffer.is_empty() { - self.flush_events(&mut event_buffer).await; + Some(Err(errors)) => { + // Log errors from the debouncer + for error in errors { + error!("Debouncer error: {:?}", error); } } + None => { + // Channel closed, exit + debug!("Event channel closed, exiting processor"); + break; + } } } } - /// Handle a single filesystem event - fn handle_event(&self, event: Event, buffer: &mut HashMap) { - trace!("Received event: {:?}", event); - - for path in event.paths { - // Skip .oxen directory - if path.components().any(|c| c.as_os_str() == ".oxen") { - continue; - } - - // Skip non-file events for now - if path.is_dir() { - continue; - } - - // Coalesce events for the same path - buffer.insert(path, (event.kind, Instant::now())); - } - } - - /// Flush buffered events to the cache - async fn flush_events(&self, buffer: &mut HashMap) { - if buffer.is_empty() { - return; - } - - debug!("Flushing {} events to cache", buffer.len()); - + /// Handle a batch of debounced events + async fn handle_debounced_events(&self, events: Vec) { let mut updates = Vec::new(); - let now = Instant::now(); - let stale_threshold = Duration::from_millis(200); - // Process each buffered event - for (path, (kind, timestamp)) in buffer.drain() { - trace!("Processing event for path '{}': {:?}", path.display(), kind); + for debounced_event in events { + trace!("Processing debounced event: {:?}", debounced_event); - // Skip stale events - if now.duration_since(timestamp) > stale_threshold { - debug!("Skipping stale event for path: {}", path.display()); - continue; - } + let event = &debounced_event.event; - // Determine the status type based on event kind - let status_type = match kind { - EventKind::Create(_) => { - // New file created - FileStatusType::Created - } - EventKind::Modify(_) => { - // File modified - FileStatusType::Modified - } - EventKind::Remove(_) => { - // File removed - FileStatusType::Removed - } - EventKind::Any | EventKind::Access(_) | EventKind::Other => { - // Skip these events + // Process each path in the event + // Note: .oxen paths are already filtered in the monitor + for path in &event.paths { + // Skip directories for now + if path.is_dir() { continue; } - }; - // Get file metadata if it exists - let (mtime, size) = if let Ok(metadata) = std::fs::metadata(&path) { - ( - metadata.modified().unwrap_or(std::time::SystemTime::now()), - metadata.len(), - ) - } else if status_type == FileStatusType::Removed { - // File was removed, use current time and zero size - (std::time::SystemTime::now(), 0) - } else { - // Skip if we can't get metadata for non-removed files - continue; - }; - - // Convert absolute path to relative path - let relative_path = match path.strip_prefix(&self.repo_path) { - Ok(rel) => rel.to_path_buf(), - Err(_) => { - // Path is not within repo, skip it + // Determine the status type based on event kind + let status_type = match event.kind { + EventKind::Create(_) => FileStatusType::Created, + EventKind::Modify(_) => FileStatusType::Modified, + EventKind::Remove(_) => FileStatusType::Removed, + EventKind::Any | EventKind::Access(_) | EventKind::Other => { + // Skip these events + continue; + } + }; + + // Get file metadata if it exists + let (mtime, size) = if let Ok(metadata) = std::fs::metadata(path) { + ( + metadata.modified().unwrap_or(std::time::SystemTime::now()), + metadata.len(), + ) + } else if status_type == FileStatusType::Removed { + // File was removed, use current time and zero size + (std::time::SystemTime::now(), 0) + } else { + // Skip if we can't get metadata for non-removed files + warn!("Could not get metadata for file: {:?}", path); continue; - } - }; - - updates.push(FileStatus { - path: relative_path, - mtime, - size, - hash: None, // Will be computed later if needed - status: status_type, - }); + }; + + // Convert absolute path to relative path + let relative_path = match util::fs::path_relative_to_dir(path, &self.repo_path) { + Ok(rel) => rel, + Err(e) => { + trace!( + "Path not within repo, skipping: {:?} (repo: {:?}, error: {})", + path, + self.repo_path, + e + ); + continue; + } + }; + + debug!( + "Processing event for {:?}: {:?}", + relative_path, status_type + ); + + updates.push(FileStatus { + path: relative_path, + mtime, + size, + hash: None, // Will be computed later if needed + status: status_type, + }); + } } // Batch update the cache if !updates.is_empty() { + debug!("Updating cache with {} file status changes", updates.len()); if let Err(e) = self.cache.batch_update(updates).await { error!("Failed to update cache: {}", e); } diff --git a/oxen-rust/src/watcher/src/event_processor_test.rs b/oxen-rust/src/watcher/src/event_processor_test.rs index a8719af1f..876536f6b 100644 --- a/oxen-rust/src/watcher/src/event_processor_test.rs +++ b/oxen-rust/src/watcher/src/event_processor_test.rs @@ -2,14 +2,16 @@ mod tests { use crate::cache::StatusCache; use crate::event_processor::EventProcessor; - use notify::{Event, EventKind}; + use notify::EventKind; + use notify_debouncer_full::{DebounceEventResult, DebouncedEvent}; + use notify::Event; use std::sync::Arc; use std::time::Duration; use tempfile::TempDir; use tokio::sync::mpsc; use tokio::time; - async fn setup_test_processor() -> (Arc, mpsc::Sender, TempDir) { + async fn setup_test_processor() -> (Arc, mpsc::Sender, TempDir) { let temp_dir = TempDir::new().unwrap(); let repo_path = temp_dir.path(); @@ -20,7 +22,7 @@ mod tests { liboxen::repositories::init::init(repo_path).unwrap(); let cache = Arc::new(StatusCache::new(repo_path).unwrap()); - let (event_tx, event_rx) = mpsc::channel::(100); + let (event_tx, event_rx) = mpsc::channel::(100); let processor = EventProcessor::new(cache.clone(), repo_path.to_path_buf()); @@ -35,28 +37,35 @@ mod tests { (cache, event_tx, temp_dir) } + fn create_debounced_event(paths: Vec, kind: EventKind) -> DebouncedEvent { + let mut event = Event::new(kind); + event.paths = paths; + DebouncedEvent { + event, + time: std::time::Instant::now(), + } + } + #[tokio::test] - async fn test_event_coalescing() { + async fn test_debounced_events() { let (cache, event_tx, temp_dir) = setup_test_processor().await; let test_file = temp_dir.path().join("test.txt"); + std::fs::write(&test_file, "content").unwrap(); - // Send multiple events for the same file rapidly - for _ in 0..5 { - let event = Event { - kind: EventKind::Modify(notify::event::ModifyKind::Any), - paths: vec![test_file.clone()], - attrs: Default::default(), - }; - event_tx.send(event).await.unwrap(); - } + // Send a debounced create event + let event = create_debounced_event( + vec![test_file.clone()], + EventKind::Create(notify::event::CreateKind::File), + ); + event_tx.send(Ok(vec![event])).await.unwrap(); - // Wait for coalescing window + // Wait for processing time::sleep(Duration::from_millis(150)).await; - // Should only have one entry in cache + // Check cache was updated let status = cache.get_status(None).await; - assert!(status.modified.len() <= 1, "Events should be coalesced"); + assert_eq!(status.created.len(), 1); } #[tokio::test] @@ -65,13 +74,12 @@ mod tests { let oxen_file = temp_dir.path().join(".oxen").join("some_file.db"); - let event = Event { - kind: EventKind::Create(notify::event::CreateKind::Any), - paths: vec![oxen_file], - attrs: Default::default(), - }; + let event = create_debounced_event( + vec![oxen_file], + EventKind::Create(notify::event::CreateKind::File), + ); - event_tx.send(event).await.unwrap(); + event_tx.send(Ok(vec![event])).await.unwrap(); // Wait for processing time::sleep(Duration::from_millis(150)).await; @@ -90,13 +98,12 @@ mod tests { let dir_path = temp_dir.path().join("some_directory"); std::fs::create_dir_all(&dir_path).unwrap(); - let event = Event { - kind: EventKind::Create(notify::event::CreateKind::Any), - paths: vec![dir_path], - attrs: Default::default(), - }; + let event = create_debounced_event( + vec![dir_path], + EventKind::Create(notify::event::CreateKind::Folder), + ); - event_tx.send(event).await.unwrap(); + event_tx.send(Ok(vec![event])).await.unwrap(); // Wait for processing time::sleep(Duration::from_millis(150)).await; @@ -111,29 +118,28 @@ mod tests { async fn test_batch_processing() { let (cache, event_tx, temp_dir) = setup_test_processor().await; - // Send events for multiple files - for i in 0..10 { + let mut events = Vec::new(); + + // Create multiple files and events + for i in 0..5 { let file_path = temp_dir.path().join(format!("file{}.txt", i)); - // Create the file so metadata can be read std::fs::write(&file_path, format!("content{}", i)).unwrap(); - - let event = Event { - kind: EventKind::Create(notify::event::CreateKind::Any), - paths: vec![file_path], - attrs: Default::default(), - }; - - event_tx.send(event).await.unwrap(); + + events.push(create_debounced_event( + vec![file_path], + EventKind::Create(notify::event::CreateKind::File), + )); } + // Send all events as a batch (this is what the debouncer does) + event_tx.send(Ok(events)).await.unwrap(); + // Wait for batch processing time::sleep(Duration::from_millis(200)).await; // Should have all files let status = cache.get_status(None).await; - let total = status.created.len() + status.modified.len() + status.removed.len(); - assert!(total > 0, "Should have processed some files"); - assert!(total <= 10, "Should not exceed number of files sent"); + assert_eq!(status.created.len(), 5); } #[tokio::test] @@ -145,11 +151,10 @@ mod tests { std::fs::write(&create_file, "content").unwrap(); event_tx - .send(Event { - kind: EventKind::Create(notify::event::CreateKind::Any), - paths: vec![create_file.clone()], - attrs: Default::default(), - }) + .send(Ok(vec![create_debounced_event( + vec![create_file.clone()], + EventKind::Create(notify::event::CreateKind::File), + )])) .await .unwrap(); @@ -158,11 +163,10 @@ mod tests { std::fs::write(&modify_file, "content").unwrap(); event_tx - .send(Event { - kind: EventKind::Modify(notify::event::ModifyKind::Any), - paths: vec![modify_file.clone()], - attrs: Default::default(), - }) + .send(Ok(vec![create_debounced_event( + vec![modify_file.clone()], + EventKind::Modify(notify::event::ModifyKind::Data(notify::event::DataChange::Any)), + )])) .await .unwrap(); @@ -170,11 +174,10 @@ mod tests { let remove_file = temp_dir.path().join("removed.txt"); event_tx - .send(Event { - kind: EventKind::Remove(notify::event::RemoveKind::Any), - paths: vec![remove_file.clone()], - attrs: Default::default(), - }) + .send(Ok(vec![create_debounced_event( + vec![remove_file.clone()], + EventKind::Remove(notify::event::RemoveKind::File), + )])) .await .unwrap(); @@ -199,21 +202,19 @@ mod tests { // Send Access event (should be ignored) event_tx - .send(Event { - kind: EventKind::Access(notify::event::AccessKind::Any), - paths: vec![file.clone()], - attrs: Default::default(), - }) + .send(Ok(vec![create_debounced_event( + vec![file.clone()], + EventKind::Access(notify::event::AccessKind::Read), + )])) .await .unwrap(); // Send Other event (should be ignored) event_tx - .send(Event { - kind: EventKind::Other, - paths: vec![file], - attrs: Default::default(), - }) + .send(Ok(vec![create_debounced_event( + vec![file], + EventKind::Other, + )])) .await .unwrap(); @@ -226,4 +227,26 @@ mod tests { assert!(status.modified.is_empty()); assert!(status.removed.is_empty()); } -} + + #[tokio::test] + async fn test_error_handling() { + let (cache, event_tx, _temp_dir) = setup_test_processor().await; + + // Send an error result (simulating debouncer errors) + let errors = vec![ + notify::Error::generic("Test error 1"), + notify::Error::generic("Test error 2"), + ]; + + event_tx.send(Err(errors)).await.unwrap(); + + // Wait for processing + time::sleep(Duration::from_millis(50)).await; + + // Should still be running and cache should be empty + let status = cache.get_status(None).await; + assert!(status.created.is_empty()); + assert!(status.modified.is_empty()); + assert!(status.removed.is_empty()); + } +} \ No newline at end of file diff --git a/oxen-rust/src/watcher/src/monitor.rs b/oxen-rust/src/watcher/src/monitor.rs index 94e7074db..1990cf7c9 100644 --- a/oxen-rust/src/watcher/src/monitor.rs +++ b/oxen-rust/src/watcher/src/monitor.rs @@ -1,7 +1,9 @@ use log::{error, info, warn}; -use notify::{Event, RecursiveMode, Watcher}; +use notify::RecursiveMode; +use notify_debouncer_full::{new_debouncer, DebounceEventResult}; use std::path::PathBuf; use std::sync::Arc; +use std::time::Duration; use tokio::sync::mpsc; use liboxen::core; @@ -44,22 +46,46 @@ impl FileSystemWatcher { let pid_file = self.repo_path.join(".oxen/watcher.pid"); std::fs::write(&pid_file, std::process::id().to_string())?; - // Create channel for filesystem events - let (event_tx, event_rx) = mpsc::channel::(1000); + // Create channel for debounced events + let (event_tx, event_rx) = mpsc::channel::(1000); + + // Create the debounced watcher with a 100ms timeout + let mut debouncer = new_debouncer( + Duration::from_millis(100), + None, // No cache override + move |result: DebounceEventResult| { + // Filter out .oxen directory events before sending + let filtered_result = match result { + Ok(events) => { + let filtered: Vec<_> = events + .into_iter() + .filter(|event| { + // Skip events for paths containing .oxen + !event + .event + .paths + .iter() + .any(|p| p.components().any(|c| c.as_os_str() == ".oxen")) + }) + .collect(); + + if filtered.is_empty() { + return; // Don't send empty events + } + Ok(filtered) + } + Err(e) => Err(e), + }; + + // Try to send filtered event, block if channel is full + // TODO: How should we handle this? + let _ = event_tx.blocking_send(filtered_result); + }, + )?; + + // Watch the repository directory + debouncer.watch(&self.repo_path, RecursiveMode::Recursive)?; - // Create the notify watcher - let mut watcher = notify::recommended_watcher(move |res: Result| { - match res { - Ok(event) => { - // Try to send event, drop if channel is full - let _ = event_tx.blocking_send(event); - } - Err(e) => error!("Filesystem watch error: {}", e), - } - })?; - - // Watch the repository directory (excluding .oxen) - watcher.watch(&self.repo_path, RecursiveMode::Recursive)?; info!("Watching directory: {}", self.repo_path.display()); // Start the event processor @@ -98,7 +124,7 @@ impl FileSystemWatcher { // Cleanup info!("Shutting down filesystem watcher"); - drop(watcher); + drop(debouncer); // Remove PID file let _ = std::fs::remove_file(&pid_file); diff --git a/oxen-rust/src/watcher/tests/integration_test.rs b/oxen-rust/src/watcher/tests/integration_test.rs index 0ca589a33..936a2c35f 100644 --- a/oxen-rust/src/watcher/tests/integration_test.rs +++ b/oxen-rust/src/watcher/tests/integration_test.rs @@ -1,10 +1,10 @@ +use oxen_watcher::ipc::send_request; +use oxen_watcher::protocol::{WatcherRequest, WatcherResponse}; use std::path::PathBuf; use std::time::Duration; use tempfile::TempDir; use tokio::process::Command; use tokio::time; -use oxen_watcher::ipc::send_request; -use oxen_watcher::protocol::{WatcherRequest, WatcherResponse}; /// Helper to get the watcher binary path fn get_watcher_path() -> PathBuf { @@ -32,7 +32,6 @@ fn get_watcher_path() -> PathBuf { } #[tokio::test] -#[ignore] // Run with: cargo test --package oxen-watcher -- --ignored async fn test_watcher_lifecycle() { let temp_dir = TempDir::new().unwrap(); let repo_path = temp_dir.path(); @@ -99,7 +98,6 @@ async fn test_watcher_lifecycle() { } #[tokio::test] -#[ignore] async fn test_watcher_file_detection() { let temp_dir = TempDir::new().unwrap(); let repo_path = temp_dir.path(); @@ -155,7 +153,6 @@ async fn test_watcher_file_detection() { } #[tokio::test] -#[ignore] async fn test_multiple_watcher_prevention() { let temp_dir = TempDir::new().unwrap(); let repo_path = temp_dir.path(); @@ -202,7 +199,6 @@ async fn test_multiple_watcher_prevention() { } #[tokio::test] -#[ignore] async fn test_watcher_reports_relative_paths() { let temp_dir = TempDir::new().unwrap(); let repo_path = temp_dir.path(); @@ -221,7 +217,7 @@ async fn test_watcher_reports_relative_paths() { .expect("Failed to start watcher"); // Give it time to start and do initial scan - time::sleep(Duration::from_secs(3)).await; + time::sleep(Duration::from_secs(1)).await; // Create test files in different directories std::fs::write(repo_path.join("root_file.txt"), "root content").unwrap(); @@ -234,20 +230,40 @@ async fn test_watcher_reports_relative_paths() { // Query the watcher via IPC let socket_path = repo_path.join(".oxen/watcher.sock"); let request = WatcherRequest::GetStatus { paths: None }; - let response = send_request(&socket_path, request).await.expect("Failed to send request"); + let response = send_request(&socket_path, request) + .await + .expect("Failed to send request"); // Verify the response contains relative paths if let WatcherResponse::Status(status) = response { // Check that all created file paths are relative for file_status in &status.created { - assert!(!file_status.path.is_absolute(), "Path should be relative, got: {:?}", file_status.path); - assert!(!file_status.path.starts_with("/"), "Path should not start with /, got: {:?}", file_status.path); + assert!( + !file_status.path.is_absolute(), + "Path should be relative, got: {:?}", + file_status.path + ); + assert!( + !file_status.path.starts_with("/"), + "Path should not start with /, got: {:?}", + file_status.path + ); } - + // Verify specific files are present with correct relative paths - let paths: Vec<_> = status.created.iter().map(|f| f.path.to_string_lossy().to_string()).collect(); - assert!(paths.contains(&"root_file.txt".to_string()), "Should contain root_file.txt"); - assert!(paths.contains(&"subdir/nested_file.txt".to_string()), "Should contain subdir/nested_file.txt"); + let paths: Vec<_> = status + .created + .iter() + .map(|f| f.path.to_string_lossy().to_string()) + .collect(); + assert!( + paths.contains(&"root_file.txt".to_string()), + "Should contain root_file.txt" + ); + assert!( + paths.contains(&"subdir/nested_file.txt".to_string()), + "Should contain subdir/nested_file.txt" + ); } else { panic!("Expected Status response, got: {:?}", response); } From a75f3f108405070cc0d6e704a5c0a9a60049976e Mon Sep 17 00:00:00 2001 From: Joshua Elliott Date: Wed, 10 Sep 2025 10:55:45 -0600 Subject: [PATCH 10/11] Correctly handle removed files --- oxen-rust/src/watcher/src/cache.rs | 14 +++-- oxen-rust/src/watcher/src/cache_test.rs | 44 ++++++++++++++- oxen-rust/src/watcher/src/event_processor.rs | 4 +- .../src/watcher/src/event_processor_test.rs | 53 +++++++++++++++++++ oxen-rust/src/watcher/src/monitor.rs | 3 ++ 5 files changed, 111 insertions(+), 7 deletions(-) diff --git a/oxen-rust/src/watcher/src/cache.rs b/oxen-rust/src/watcher/src/cache.rs index 24c18e6f2..efdea065e 100644 --- a/oxen-rust/src/watcher/src/cache.rs +++ b/oxen-rust/src/watcher/src/cache.rs @@ -50,10 +50,18 @@ impl MemoryCache { self.removed.remove(&status.path); } FileStatusType::Removed => { - self.removed.insert(status.path.clone(), status.clone()); - // If removed, clear from created and modified - self.created.remove(&status.path); + // Check if this file was created in the current session + let was_created_in_session = self.created.remove(&status.path).is_some(); self.modified.remove(&status.path); + + // Only add to removed list if the file existed before this session + // If file was created in this session and then deleted, + // net effect is nothing, so don't add to removed + if !was_created_in_session { + // File existed before watcher started (or was in initial scan), + // so track its removal + self.removed.insert(status.path.clone(), status.clone()); + } } } } diff --git a/oxen-rust/src/watcher/src/cache_test.rs b/oxen-rust/src/watcher/src/cache_test.rs index 976ff7778..769b5dc4a 100644 --- a/oxen-rust/src/watcher/src/cache_test.rs +++ b/oxen-rust/src/watcher/src/cache_test.rs @@ -135,11 +135,53 @@ mod tests { }).await.unwrap(); let status = cache.get_status(None).await; - assert_eq!(status.removed.len(), 1); + // File was created and removed in same session, so should not appear anywhere + assert_eq!(status.removed.len(), 0); // Not in removed (net effect is nothing) assert_eq!(status.created.len(), 0); // Removed clears created assert_eq!(status.modified.len(), 0); // Removed clears modified } + #[tokio::test] + async fn test_remove_existing_file() { + let temp_dir = TempDir::new().unwrap(); + let repo_path = temp_dir.path(); + + // Initialize a proper oxen repo + liboxen::repositories::init::init(repo_path).unwrap(); + + let cache = StatusCache::new(repo_path).unwrap(); + + let path = PathBuf::from("existing.txt"); + + // File starts as Modified (existed before watcher, was modified) + cache.update_file_status(FileStatus { + path: path.clone(), + mtime: SystemTime::now(), + size: 100, + hash: None, + status: FileStatusType::Modified, + }).await.unwrap(); + + let status = cache.get_status(None).await; + assert_eq!(status.modified.len(), 1); + assert_eq!(status.created.len(), 0); + + // Now remove it + cache.update_file_status(FileStatus { + path: path.clone(), + mtime: SystemTime::now(), + size: 0, + hash: None, + status: FileStatusType::Removed, + }).await.unwrap(); + + let status = cache.get_status(None).await; + // File existed before session and was removed, should show in removed list + assert_eq!(status.removed.len(), 1); + assert_eq!(status.created.len(), 0); + assert_eq!(status.modified.len(), 0); + } + #[tokio::test] async fn test_path_filtering() { let (cache, _temp_dir) = setup_test_cache().await; diff --git a/oxen-rust/src/watcher/src/event_processor.rs b/oxen-rust/src/watcher/src/event_processor.rs index 82995d4d8..c0af820c2 100644 --- a/oxen-rust/src/watcher/src/event_processor.rs +++ b/oxen-rust/src/watcher/src/event_processor.rs @@ -19,8 +19,6 @@ pub struct EventProcessor { impl EventProcessor { pub fn new(cache: Arc, repo_path: PathBuf) -> Self { - // Canonicalize the repo path once to handle symlinks properly - let repo_path = repo_path.canonicalize().unwrap_or(repo_path); Self { cache, repo_path } } @@ -91,7 +89,7 @@ impl EventProcessor { continue; }; - // Convert absolute path to relative path + // Convert absolute path to relative path using liboxen let relative_path = match util::fs::path_relative_to_dir(path, &self.repo_path) { Ok(rel) => rel, Err(e) => { diff --git a/oxen-rust/src/watcher/src/event_processor_test.rs b/oxen-rust/src/watcher/src/event_processor_test.rs index 876536f6b..f979e631b 100644 --- a/oxen-rust/src/watcher/src/event_processor_test.rs +++ b/oxen-rust/src/watcher/src/event_processor_test.rs @@ -249,4 +249,57 @@ mod tests { assert!(status.modified.is_empty()); assert!(status.removed.is_empty()); } + + #[tokio::test] + async fn test_file_create_then_delete() { + let (cache, event_tx, temp_dir) = setup_test_processor().await; + + let test_file = temp_dir.path().join("test_create_delete.txt"); + + // First create the file and send a create event + std::fs::write(&test_file, "content").unwrap(); + + let create_event = create_debounced_event( + vec![test_file.clone()], + EventKind::Create(notify::event::CreateKind::File), + ); + event_tx.send(Ok(vec![create_event])).await.unwrap(); + + // Wait for processing + time::sleep(Duration::from_millis(150)).await; + + // Verify file is in created list + let status = cache.get_status(None).await; + assert_eq!(status.created.len(), 1, "File should be in created list"); + assert!(status.modified.is_empty()); + assert!(status.removed.is_empty()); + + // Now delete the file and send a remove event + std::fs::remove_file(&test_file).unwrap(); + + let remove_event = create_debounced_event( + vec![test_file.clone()], + EventKind::Remove(notify::event::RemoveKind::File), + ); + event_tx.send(Ok(vec![remove_event])).await.unwrap(); + + // Wait for processing + time::sleep(Duration::from_millis(150)).await; + + // After deletion, file should be removed from created list + // and should either be in removed list or completely gone + let status = cache.get_status(None).await; + + assert!( + status.created.is_empty(), + "File should not be in created list after deletion" + ); + assert!(status.modified.is_empty()); + // The file was created and deleted within the watcher session, + // so it should not appear in any list (net effect is no change) + assert!( + status.removed.is_empty(), + "File created and deleted in same session should not appear in removed list" + ); + } } \ No newline at end of file diff --git a/oxen-rust/src/watcher/src/monitor.rs b/oxen-rust/src/watcher/src/monitor.rs index 1990cf7c9..5ba834960 100644 --- a/oxen-rust/src/watcher/src/monitor.rs +++ b/oxen-rust/src/watcher/src/monitor.rs @@ -30,6 +30,9 @@ impl FileSystemWatcher { )); } + // Canonicalize the repo path to handle symlinks + let repo_path = repo_path.canonicalize()?; + let cache = Arc::new(StatusCache::new(&repo_path)?); Ok(Self { repo_path, cache }) From cfd455358d5971a73e93a661b44240d90a1d790b Mon Sep 17 00:00:00 2001 From: Joshua Elliott Date: Wed, 10 Sep 2025 11:34:19 -0600 Subject: [PATCH 11/11] Filter out staged files from watcher "created" files --- oxen-rust/src/lib/src/core/v_latest/status.rs | 63 ++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/oxen-rust/src/lib/src/core/v_latest/status.rs b/oxen-rust/src/lib/src/core/v_latest/status.rs index 0806b55f0..ac3951ea1 100644 --- a/oxen-rust/src/lib/src/core/v_latest/status.rs +++ b/oxen-rust/src/lib/src/core/v_latest/status.rs @@ -182,6 +182,13 @@ fn merge_watcher_with_staged( // Process staged entries and build staged data status_from_dir_entries(&mut staged_data, dir_entries)?; + + // Filter out staged files from untracked files + // Files that have been staged should not appear as untracked + let staged_paths: HashSet<&PathBuf> = staged_data.staged_files.keys().collect(); + staged_data + .untracked_files + .retain(|path| !staged_paths.contains(&path)); } // Find merge conflicts @@ -1072,7 +1079,6 @@ fn count_removed_entries( Ok(()) } -// Helper functions (implement these based on your existing code) fn open_staged_db( repo: &LocalRepository, ) -> Result>, OxenError> { @@ -1552,4 +1558,59 @@ mod tests { Ok(()) }) } + + #[tokio::test] + async fn test_merge_watcher_filters_staged_from_untracked() -> Result<(), OxenError> { + test::run_empty_local_repo_test_async(|repo| async move { + // First, create some files and stage one of them + let file1_path = repo.path.join("file1.txt"); + let file2_path = repo.path.join("file2.txt"); + let file3_path = repo.path.join("file3.txt"); + + test::write_txt_file_to_path(&file1_path, "content1")?; + test::write_txt_file_to_path(&file2_path, "content2")?; + test::write_txt_file_to_path(&file3_path, "content3")?; + + // Stage file1.txt + repositories::add(&repo, &file1_path).await?; + + // Create watcher status that reports all three files as created/untracked + let mut untracked = HashSet::new(); + untracked.insert(PathBuf::from("file1.txt")); // This one is staged + untracked.insert(PathBuf::from("file2.txt")); // This one is not staged + untracked.insert(PathBuf::from("file3.txt")); // This one is not staged + + let watcher_status = WatcherStatus { + created: untracked, + modified: HashSet::new(), + removed: HashSet::new(), + scan_complete: true, + last_updated: SystemTime::now(), + }; + + let opts = StagedDataOpts::default(); + + // Run merge function + let result = merge_watcher_with_staged(&repo, &opts, watcher_status)?; + + // Verify file1.txt is in staged_files but NOT in untracked_files + assert!(result + .staged_files + .contains_key(&PathBuf::from("file1.txt"))); + assert!(!result.untracked_files.contains(&PathBuf::from("file1.txt"))); + + // Verify file2.txt and file3.txt are still in untracked_files + assert!(result.untracked_files.contains(&PathBuf::from("file2.txt"))); + assert!(result.untracked_files.contains(&PathBuf::from("file3.txt"))); + + // Verify we have exactly 2 untracked files (file2 and file3) + assert_eq!(result.untracked_files.len(), 2); + + // Verify we have exactly 1 staged file (file1) + assert_eq!(result.staged_files.len(), 1); + + Ok(()) + }) + .await + } }