diff --git a/crates/bashkit/src/fs/memory.rs b/crates/bashkit/src/fs/memory.rs index 2cab5f1b..09135774 100644 --- a/crates/bashkit/src/fs/memory.rs +++ b/crates/bashkit/src/fs/memory.rs @@ -42,7 +42,7 @@ use async_trait::async_trait; use std::collections::HashMap; use std::io::{Error as IoError, ErrorKind}; use std::path::{Path, PathBuf}; -use std::sync::RwLock; +use std::sync::{Arc, RwLock}; use std::time::SystemTime; use super::limits::{FsLimits, FsUsage}; @@ -170,12 +170,26 @@ pub struct InMemoryFs { limits: FsLimits, } -#[derive(Debug, Clone)] +/// Lazy file content loader type. +/// +/// Called at most once when the file is first read. The loader is never called +/// if the file is overwritten before being read. +pub type LazyLoader = Arc Vec + Send + Sync>; + enum FsEntry { File { content: Vec, metadata: Metadata, }, + /// A file whose content is loaded on first read. + /// + /// `stat()` returns metadata without triggering the load. + /// On first `read_file()`, the loader is called and the entry is replaced + /// with a regular `File`. If written before read, the loader is never called. + LazyFile { + loader: LazyLoader, + metadata: Metadata, + }, Directory { metadata: Metadata, }, @@ -189,6 +203,62 @@ enum FsEntry { }, } +impl Clone for FsEntry { + fn clone(&self) -> Self { + match self { + Self::File { content, metadata } => Self::File { + content: content.clone(), + metadata: metadata.clone(), + }, + Self::LazyFile { loader, metadata } => Self::LazyFile { + loader: Arc::clone(loader), + metadata: metadata.clone(), + }, + Self::Directory { metadata } => Self::Directory { + metadata: metadata.clone(), + }, + Self::Symlink { target, metadata } => Self::Symlink { + target: target.clone(), + metadata: metadata.clone(), + }, + Self::Fifo { content, metadata } => Self::Fifo { + content: content.clone(), + metadata: metadata.clone(), + }, + } + } +} + +impl std::fmt::Debug for FsEntry { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::File { content, metadata } => f + .debug_struct("File") + .field("content_len", &content.len()) + .field("metadata", metadata) + .finish(), + Self::LazyFile { metadata, .. } => f + .debug_struct("LazyFile") + .field("metadata", metadata) + .finish(), + Self::Directory { metadata } => f + .debug_struct("Directory") + .field("metadata", metadata) + .finish(), + Self::Symlink { target, metadata } => f + .debug_struct("Symlink") + .field("target", target) + .field("metadata", metadata) + .finish(), + Self::Fifo { content, metadata } => f + .debug_struct("Fifo") + .field("content_len", &content.len()) + .field("metadata", metadata) + .finish(), + } + } +} + /// A snapshot of the virtual filesystem state. /// /// Captures all files, directories, and symlinks. Can be serialized with serde @@ -423,6 +493,11 @@ impl InMemoryFs { FsEntry::Directory { .. } => { dir_count += 1; } + FsEntry::LazyFile { metadata, .. } => { + // Lazy files count by their declared metadata size + total_bytes += metadata.size; + file_count += 1; + } FsEntry::Symlink { .. } => { // THREAT[TM-DOS-045]: Symlinks count toward file count file_count += 1; @@ -517,7 +592,24 @@ impl InMemoryFs { /// # } /// ``` pub fn snapshot(&self) -> VfsSnapshot { - let entries = self.entries.read().unwrap(); + // Use write lock to materialize any lazy files before snapshotting + let mut entries = self.entries.write().unwrap(); + + // Materialize all lazy files + let lazy_paths: Vec = entries + .iter() + .filter(|(_, e)| matches!(e, FsEntry::LazyFile { .. })) + .map(|(p, _)| p.clone()) + .collect(); + for path in lazy_paths { + if let Some(FsEntry::LazyFile { loader, metadata }) = entries.remove(&path) { + let content = loader(); + let mut metadata = metadata; + metadata.size = content.len() as u64; + entries.insert(path, FsEntry::File { content, metadata }); + } + } + let mut files = Vec::new(); for (path, entry) in entries.iter() { @@ -531,6 +623,10 @@ impl InMemoryFs { mode: metadata.mode, }); } + FsEntry::LazyFile { .. } => { + // All lazy files were materialized above + unreachable!() + } FsEntry::Directory { metadata } => { files.push(VfsEntry { path: path.clone(), @@ -780,6 +876,78 @@ impl InMemoryFs { }, ); } + + /// Add a lazy file whose content is loaded on first read. + /// + /// The `loader` closure is called at most once when the file is first read. + /// If the file is overwritten before being read, the loader is never called. + /// `stat()` returns metadata without triggering the load. + /// + /// `size_hint` is used for metadata and resource-limit accounting before + /// the content is actually loaded. + /// + /// # Example + /// + /// ```rust + /// use bashkit::InMemoryFs; + /// use std::sync::Arc; + /// + /// let fs = InMemoryFs::new(); + /// fs.add_lazy_file("/data/large.bin", 1024, 0o644, Arc::new(|| { + /// vec![0u8; 1024] + /// })); + /// ``` + pub fn add_lazy_file( + &self, + path: impl AsRef, + size_hint: u64, + mode: u32, + loader: LazyLoader, + ) { + let path = Self::normalize_path(path.as_ref()); + + if self.limits.validate_path(&path).is_err() { + return; + } + + let mut entries = self.entries.write().unwrap(); + + // Ensure parent directories exist + if let Some(parent) = path.parent() { + let mut current = PathBuf::from("/"); + for component in parent.components().skip(1) { + current.push(component); + if !entries.contains_key(¤t) { + entries.insert( + current.clone(), + FsEntry::Directory { + metadata: Metadata { + file_type: FileType::Directory, + size: 0, + mode: 0o755, + modified: SystemTime::now(), + created: SystemTime::now(), + }, + }, + ); + } + } + } + + entries.insert( + path, + FsEntry::LazyFile { + loader, + metadata: Metadata { + file_type: FileType::File, + size: size_hint, + mode, + modified: SystemTime::now(), + created: SystemTime::now(), + }, + }, + ); + } } #[async_trait] @@ -818,13 +986,50 @@ impl FileSystem for InMemoryFs { return Ok(Self::generate_random_bytes()); } - let entries = self.entries.read().unwrap(); + // First try with a read lock for the common (non-lazy) case + { + let entries = self.entries.read().unwrap(); + match entries.get(&path) { + Some(FsEntry::File { content, .. }) | Some(FsEntry::Fifo { content, .. }) => { + return Ok(content.clone()); + } + Some(FsEntry::Directory { .. }) => { + return Err(IoError::other("is a directory").into()); + } + Some(FsEntry::Symlink { .. }) => { + return Err(IoError::new(ErrorKind::NotFound, "file not found").into()); + } + Some(FsEntry::LazyFile { .. }) => { + // Need write lock to materialize — fall through + } + None => { + return Err(IoError::new(ErrorKind::NotFound, "file not found").into()); + } + } + } + // Materialize lazy file: acquire write lock + let mut entries = self.entries.write().unwrap(); match entries.get(&path) { + Some(FsEntry::LazyFile { .. }) => { + // Extract loader, call it, replace entry + if let Some(FsEntry::LazyFile { loader, metadata }) = entries.remove(&path) { + let content = loader(); + let mut metadata = metadata; + metadata.size = content.len() as u64; + let result = content.clone(); + entries.insert(path, FsEntry::File { content, metadata }); + return Ok(result); + } + unreachable!() + } + // Another thread may have materialized it between lock releases Some(FsEntry::File { content, .. }) | Some(FsEntry::Fifo { content, .. }) => { - Ok(content.clone()) + return Ok(content.clone()); + } + Some(FsEntry::Directory { .. }) => { + return Err(IoError::other("is a directory").into()); } - Some(FsEntry::Directory { .. }) => Err(IoError::other("is a directory").into()), Some(FsEntry::Symlink { .. }) => { // Symlinks are intentionally not followed for security (TM-ESC-002, TM-DOS-011) Err(IoError::new(ErrorKind::NotFound, "file not found").into()) @@ -975,6 +1180,22 @@ impl FileSystem for InMemoryFs { ); return Ok(()); } + Some(FsEntry::LazyFile { .. }) => { + // Materialize lazy file before appending + if let Some(FsEntry::LazyFile { loader, metadata }) = entries.remove(&path) { + let loaded = loader(); + let mut metadata = metadata; + metadata.size = loaded.len() as u64; + entries.insert( + path.clone(), + FsEntry::File { + content: loaded, + metadata, + }, + ); + } + // Fall through to append logic below + } Some(FsEntry::File { .. } | FsEntry::Fifo { .. }) => { // Fall through to append logic below } @@ -1060,7 +1281,12 @@ impl FileSystem for InMemoryFs { Some(FsEntry::Directory { .. }) => { // Directory exists, continue to next component } - Some(FsEntry::File { .. } | FsEntry::Symlink { .. } | FsEntry::Fifo { .. }) => { + Some( + FsEntry::File { .. } + | FsEntry::LazyFile { .. } + | FsEntry::Symlink { .. } + | FsEntry::Fifo { .. }, + ) => { // File, symlink, or fifo exists at path - cannot create directory return Err(IoError::new(ErrorKind::AlreadyExists, "file exists").into()); } @@ -1144,7 +1370,12 @@ impl FileSystem for InMemoryFs { entries.remove(&path); } } - Some(FsEntry::File { .. } | FsEntry::Symlink { .. } | FsEntry::Fifo { .. }) => { + Some( + FsEntry::File { .. } + | FsEntry::LazyFile { .. } + | FsEntry::Symlink { .. } + | FsEntry::Fifo { .. }, + ) => { entries.remove(&path); } None => { @@ -1164,6 +1395,7 @@ impl FileSystem for InMemoryFs { match entries.get(&path) { Some(FsEntry::File { metadata, .. }) + | Some(FsEntry::LazyFile { metadata, .. }) | Some(FsEntry::Directory { metadata }) | Some(FsEntry::Symlink { metadata, .. }) | Some(FsEntry::Fifo { metadata, .. }) => Ok(metadata.clone()), @@ -1191,6 +1423,7 @@ impl FileSystem for InMemoryFs { let metadata = match entry { FsEntry::File { metadata, .. } + | FsEntry::LazyFile { metadata, .. } | FsEntry::Directory { metadata } | FsEntry::Symlink { metadata, .. } | FsEntry::Fifo { metadata, .. } => metadata.clone(), @@ -1321,6 +1554,7 @@ impl FileSystem for InMemoryFs { match entries.get_mut(&path) { Some(FsEntry::File { metadata, .. }) + | Some(FsEntry::LazyFile { metadata, .. }) | Some(FsEntry::Directory { metadata }) | Some(FsEntry::Symlink { metadata, .. }) | Some(FsEntry::Fifo { metadata, .. }) => { @@ -1974,4 +2208,85 @@ mod tests { .unwrap(); assert_eq!(content.len(), 8192); } + + #[tokio::test] + async fn test_lazy_file_read() { + let fs = InMemoryFs::new(); + let called = Arc::new(std::sync::atomic::AtomicBool::new(false)); + let called_clone = Arc::clone(&called); + fs.add_lazy_file( + "/tmp/lazy.txt", + 5, + 0o644, + Arc::new(move || { + called_clone.store(true, std::sync::atomic::Ordering::SeqCst); + b"hello".to_vec() + }), + ); + + // stat should not trigger loading + let meta = fs.stat(Path::new("/tmp/lazy.txt")).await.unwrap(); + assert_eq!(meta.file_type, FileType::File); + assert!(!called.load(std::sync::atomic::Ordering::SeqCst)); + + // read triggers loading + let content = fs.read_file(Path::new("/tmp/lazy.txt")).await.unwrap(); + assert_eq!(content, b"hello"); + assert!(called.load(std::sync::atomic::Ordering::SeqCst)); + } + + #[tokio::test] + async fn test_lazy_file_write_before_read_skips_loader() { + let fs = InMemoryFs::new(); + let called = Arc::new(std::sync::atomic::AtomicBool::new(false)); + let called_clone = Arc::clone(&called); + fs.add_lazy_file( + "/tmp/lazy.txt", + 5, + 0o644, + Arc::new(move || { + called_clone.store(true, std::sync::atomic::Ordering::SeqCst); + b"lazy".to_vec() + }), + ); + + // write before read replaces the lazy entry + fs.write_file(Path::new("/tmp/lazy.txt"), b"eager") + .await + .unwrap(); + let content = fs.read_file(Path::new("/tmp/lazy.txt")).await.unwrap(); + assert_eq!(content, b"eager"); + // loader was never called + assert!(!called.load(std::sync::atomic::Ordering::SeqCst)); + } + + #[tokio::test] + async fn test_lazy_file_exists_and_readdir() { + let fs = InMemoryFs::new(); + fs.add_lazy_file("/tmp/lazy.txt", 10, 0o644, Arc::new(|| b"content".to_vec())); + + assert!(fs.exists(Path::new("/tmp/lazy.txt")).await.unwrap()); + + let entries = fs.read_dir(Path::new("/tmp")).await.unwrap(); + let names: Vec<&str> = entries.iter().map(|e| e.name.as_str()).collect(); + assert!(names.contains(&"lazy.txt")); + } + + #[tokio::test] + async fn test_lazy_file_snapshot_materializes() { + let fs = InMemoryFs::new(); + fs.add_lazy_file("/tmp/lazy.txt", 6, 0o644, Arc::new(|| b"snappy".to_vec())); + + let snapshot = fs.snapshot(); + // After snapshot, the entry should be a regular file + let content = fs.read_file(Path::new("/tmp/lazy.txt")).await.unwrap(); + assert_eq!(content, b"snappy"); + + // Verify snapshot contains the file + let has_file = snapshot + .entries + .iter() + .any(|e| e.path == Path::new("/tmp/lazy.txt")); + assert!(has_file); + } } diff --git a/crates/bashkit/src/fs/mod.rs b/crates/bashkit/src/fs/mod.rs index f0469a48..8f59e999 100644 --- a/crates/bashkit/src/fs/mod.rs +++ b/crates/bashkit/src/fs/mod.rs @@ -408,7 +408,7 @@ mod traits; pub use backend::FsBackend; pub use limits::{FsLimitExceeded, FsLimits, FsUsage}; -pub use memory::{InMemoryFs, VfsSnapshot}; +pub use memory::{InMemoryFs, LazyLoader, VfsSnapshot}; pub use mountable::MountableFs; pub use overlay::OverlayFs; pub use posix::PosixFs; diff --git a/crates/bashkit/src/lib.rs b/crates/bashkit/src/lib.rs index 9b95a321..e8621251 100644 --- a/crates/bashkit/src/lib.rs +++ b/crates/bashkit/src/lib.rs @@ -421,9 +421,9 @@ pub use builtins::{Builtin, Context as BuiltinContext}; pub use error::{Error, Result}; pub use fs::{ DirEntry, FileSystem, FileSystemExt, FileType, FsBackend, FsLimitExceeded, FsLimits, FsUsage, - InMemoryFs, Metadata, MountableFs, OverlayFs, PosixFs, SearchCapabilities, SearchCapable, - SearchMatch, SearchProvider, SearchQuery, SearchResults, VfsSnapshot, normalize_path, - verify_filesystem_requirements, + InMemoryFs, LazyLoader, Metadata, MountableFs, OverlayFs, PosixFs, SearchCapabilities, + SearchCapable, SearchMatch, SearchProvider, SearchQuery, SearchResults, VfsSnapshot, + normalize_path, verify_filesystem_requirements, }; #[cfg(feature = "realfs")] pub use fs::{RealFs, RealFsMode}; @@ -973,6 +973,13 @@ struct MountedFile { mode: u32, } +struct MountedLazyFile { + path: PathBuf, + size_hint: u64, + mode: u32, + loader: LazyLoader, +} + /// A real host directory to mount in the VFS during builder construction. #[cfg(feature = "realfs")] struct MountedRealDir { @@ -1001,6 +1008,8 @@ pub struct BashBuilder { custom_builtins: HashMap>, /// Files to mount in the virtual filesystem mounted_files: Vec, + /// Lazy files to mount (loaded on first read) + mounted_lazy_files: Vec, /// Network allowlist for curl/wget builtins #[cfg(feature = "http_client")] network_allowlist: Option, @@ -1491,6 +1500,46 @@ impl BashBuilder { self } + /// Mount a lazy file whose content is loaded on first read. + /// + /// The `loader` closure is called at most once when the file is first read. + /// If the file is overwritten before being read, the loader is never called. + /// `stat()` returns metadata using `size_hint` without triggering the load. + /// + /// # Example + /// + /// ```rust + /// use bashkit::Bash; + /// use std::sync::Arc; + /// + /// # #[tokio::main] + /// # async fn main() -> bashkit::Result<()> { + /// let mut bash = Bash::builder() + /// .mount_lazy("/data/large.csv", 1024, Arc::new(|| { + /// b"id,name\n1,Alice\n".to_vec() + /// })) + /// .build(); + /// + /// let result = bash.exec("cat /data/large.csv").await?; + /// assert_eq!(result.stdout, "id,name\n1,Alice\n"); + /// # Ok(()) + /// # } + /// ``` + pub fn mount_lazy( + mut self, + path: impl Into, + size_hint: u64, + loader: LazyLoader, + ) -> Self { + self.mounted_lazy_files.push(MountedLazyFile { + path: path.into(), + size_hint, + mode: 0o644, + loader, + }); + self + } + /// Mount a real host directory as a readonly overlay at the VFS root. /// /// Files from `host_path` become visible at the same paths inside the VFS. @@ -1644,16 +1693,21 @@ impl BashBuilder { #[cfg(feature = "realfs")] let base_fs = Self::apply_real_mounts(&self.real_mounts, base_fs); - // Layer 2: If there are mounted text files, wrap in an OverlayFs - let base_fs: Arc = if self.mounted_files.is_empty() { - base_fs - } else { + // Layer 2: If there are mounted text/lazy files, wrap in an OverlayFs + let has_mounts = !self.mounted_files.is_empty() || !self.mounted_lazy_files.is_empty(); + let base_fs: Arc = if has_mounts { let overlay = OverlayFs::new(base_fs); - // Add mounted files to the overlay layer for mf in &self.mounted_files { overlay.upper().add_file(&mf.path, &mf.content, mf.mode); } + for lf in self.mounted_lazy_files { + overlay + .upper() + .add_lazy_file(&lf.path, lf.size_hint, lf.mode, lf.loader); + } Arc::new(overlay) + } else { + base_fs }; // Layer 3: Wrap in MountableFs for post-build live mount/unmount