From a12432e6f67bb7fc370f15bf9ea27ef9efb749dc Mon Sep 17 00:00:00 2001 From: Malcolm Greaves Date: Wed, 25 Feb 2026 16:49:21 -0800 Subject: [PATCH 1/9] wip oxen lfs git integration --- oxen-rust/src/cli/src/cmd.rs | 3 + oxen-rust/src/cli/src/cmd/lfs.rs | 103 ++++++++ oxen-rust/src/cli/src/cmd/lfs/clean.rs | 53 +++++ oxen-rust/src/cli/src/cmd/lfs/env.rs | 53 +++++ .../src/cli/src/cmd/lfs/filter_process.rs | 31 +++ oxen-rust/src/cli/src/cmd/lfs/init.rs | 90 +++++++ oxen-rust/src/cli/src/cmd/lfs/install.rs | 39 +++ oxen-rust/src/cli/src/cmd/lfs/pull.rs | 38 +++ oxen-rust/src/cli/src/cmd/lfs/push.rs | 33 +++ oxen-rust/src/cli/src/cmd/lfs/smudge.rs | 55 +++++ oxen-rust/src/cli/src/cmd/lfs/status.rs | 53 +++++ oxen-rust/src/cli/src/cmd/lfs/track.rs | 51 ++++ oxen-rust/src/cli/src/cmd/lfs/untrack.rs | 38 +++ oxen-rust/src/cli/src/main.rs | 1 + oxen-rust/src/lib/src/constants.rs | 6 + oxen-rust/src/lib/src/lfs.rs | 9 + oxen-rust/src/lib/src/lfs/config.rs | 60 +++++ oxen-rust/src/lib/src/lfs/filter.rs | 143 +++++++++++ oxen-rust/src/lib/src/lfs/filter_process.rs | 224 ++++++++++++++++++ oxen-rust/src/lib/src/lfs/gitattributes.rs | 149 ++++++++++++ oxen-rust/src/lib/src/lfs/hooks.rs | 173 ++++++++++++++ oxen-rust/src/lib/src/lfs/install.rs | 63 +++++ oxen-rust/src/lib/src/lfs/pointer.rs | 161 +++++++++++++ oxen-rust/src/lib/src/lfs/status.rs | 121 ++++++++++ oxen-rust/src/lib/src/lfs/sync.rs | 89 +++++++ oxen-rust/src/lib/src/lib.rs | 1 + 26 files changed, 1840 insertions(+) create mode 100644 oxen-rust/src/cli/src/cmd/lfs.rs create mode 100644 oxen-rust/src/cli/src/cmd/lfs/clean.rs create mode 100644 oxen-rust/src/cli/src/cmd/lfs/env.rs create mode 100644 oxen-rust/src/cli/src/cmd/lfs/filter_process.rs create mode 100644 oxen-rust/src/cli/src/cmd/lfs/init.rs create mode 100644 oxen-rust/src/cli/src/cmd/lfs/install.rs create mode 100644 oxen-rust/src/cli/src/cmd/lfs/pull.rs create mode 100644 oxen-rust/src/cli/src/cmd/lfs/push.rs create mode 100644 oxen-rust/src/cli/src/cmd/lfs/smudge.rs create mode 100644 oxen-rust/src/cli/src/cmd/lfs/status.rs create mode 100644 oxen-rust/src/cli/src/cmd/lfs/track.rs create mode 100644 oxen-rust/src/cli/src/cmd/lfs/untrack.rs create mode 100644 oxen-rust/src/lib/src/lfs.rs create mode 100644 oxen-rust/src/lib/src/lfs/config.rs create mode 100644 oxen-rust/src/lib/src/lfs/filter.rs create mode 100644 oxen-rust/src/lib/src/lfs/filter_process.rs create mode 100644 oxen-rust/src/lib/src/lfs/gitattributes.rs create mode 100644 oxen-rust/src/lib/src/lfs/hooks.rs create mode 100644 oxen-rust/src/lib/src/lfs/install.rs create mode 100644 oxen-rust/src/lib/src/lfs/pointer.rs create mode 100644 oxen-rust/src/lib/src/lfs/status.rs create mode 100644 oxen-rust/src/lib/src/lfs/sync.rs diff --git a/oxen-rust/src/cli/src/cmd.rs b/oxen-rust/src/cli/src/cmd.rs index db35f679d..77827fd71 100644 --- a/oxen-rust/src/cli/src/cmd.rs +++ b/oxen-rust/src/cli/src/cmd.rs @@ -51,6 +51,9 @@ pub use info::InfoCmd; pub mod init; pub use init::InitCmd; +pub mod lfs; +pub use lfs::LfsCmd; + pub mod load; pub use load::LoadCmd; diff --git a/oxen-rust/src/cli/src/cmd/lfs.rs b/oxen-rust/src/cli/src/cmd/lfs.rs new file mode 100644 index 000000000..712af7175 --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs.rs @@ -0,0 +1,103 @@ +pub mod clean; +pub use clean::LfsCleanCmd; + +pub mod env; +pub use env::LfsEnvCmd; + +pub mod filter_process; +pub use filter_process::LfsFilterProcessCmd; + +pub mod init; +pub use init::LfsInitCmd; + +pub mod install; +pub use install::LfsInstallCmd; + +pub mod pull; +pub use pull::LfsPullCmd; + +pub mod push; +pub use push::LfsPushCmd; + +pub mod smudge; +pub use smudge::LfsSmudgeCmd; + +pub mod status; +pub use status::LfsStatusCmd; + +pub mod track; +pub use track::LfsTrackCmd; + +pub mod untrack; +pub use untrack::LfsUntrackCmd; + +use async_trait::async_trait; +use clap::Command; + +use liboxen::error::OxenError; +use std::collections::HashMap; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "lfs"; +pub struct LfsCmd; + +#[async_trait] +impl RunCmd for LfsCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + let mut command = Command::new(NAME) + .about("Oxen large file storage (Git LFS replacement)") + .subcommand_required(true) + .arg_required_else_help(true); + + let sub_commands = Self::get_subcommands(); + for cmd in sub_commands.values() { + command = command.subcommand(cmd.args()); + } + command + } + + async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> { + let sub_commands = Self::get_subcommands(); + if let Some((name, sub_matches)) = args.subcommand() { + let Some(cmd) = sub_commands.get(name) else { + eprintln!("Unknown lfs subcommand {name}"); + return Err(OxenError::basic_str(format!( + "Unknown lfs subcommand {name}" + ))); + }; + + tokio::task::block_in_place(|| { + tokio::runtime::Handle::current().block_on(cmd.run(sub_matches)) + })?; + } + Ok(()) + } +} + +impl LfsCmd { + fn get_subcommands() -> HashMap> { + let commands: Vec> = vec![ + Box::new(LfsCleanCmd), + Box::new(LfsEnvCmd), + Box::new(LfsFilterProcessCmd), + Box::new(LfsInitCmd), + Box::new(LfsInstallCmd), + Box::new(LfsPullCmd), + Box::new(LfsPushCmd), + Box::new(LfsSmudgeCmd), + Box::new(LfsStatusCmd), + Box::new(LfsTrackCmd), + Box::new(LfsUntrackCmd), + ]; + let mut runners: HashMap> = HashMap::new(); + for cmd in commands { + runners.insert(cmd.name().to_string(), cmd); + } + runners + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/clean.rs b/oxen-rust/src/cli/src/cmd/lfs/clean.rs new file mode 100644 index 000000000..05f3f623b --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/clean.rs @@ -0,0 +1,53 @@ +use async_trait::async_trait; +use clap::{Arg, Command}; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "clean"; +pub struct LfsCleanCmd; + +#[async_trait] +impl RunCmd for LfsCleanCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME) + .about("Clean filter for a single file (invoked by Git)") + .arg(Arg::new("separator").long("").hide(true)) + .arg( + Arg::new("file") + .help("Path to the file being cleaned") + .required(false), + ) + } + + async fn run(&self, _args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let versions_dir = repo_root.join(OXEN_HIDDEN_DIR).join("versions"); + + // Read content from stdin. + let content = { + use std::io::Read; + let mut buf = Vec::new(); + std::io::stdin().read_to_end(&mut buf)?; + buf + }; + + let result = lfs::filter::clean(&versions_dir, &content).await?; + + // Write result to stdout. + { + use std::io::Write; + std::io::stdout().write_all(&result)?; + std::io::stdout().flush()?; + } + + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/env.rs b/oxen-rust/src/cli/src/cmd/lfs/env.rs new file mode 100644 index 000000000..7241e716d --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/env.rs @@ -0,0 +1,53 @@ +use async_trait::async_trait; +use clap::Command; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "env"; +pub struct LfsEnvCmd; + +#[async_trait] +impl RunCmd for LfsEnvCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME).about("Show Oxen LFS environment and diagnostic info") + } + + async fn run(&self, _args: &clap::ArgMatches) -> Result<(), OxenError> { + println!("oxen lfs environment"); + println!(" oxen version: {}", liboxen::constants::OXEN_VERSION); + + let repo_root = std::env::current_dir()?; + let oxen_dir = repo_root.join(OXEN_HIDDEN_DIR); + + if oxen_dir.exists() { + let config = lfs::config::LfsConfig::load(&oxen_dir)?; + println!( + " remote: {}", + config.remote_url.as_deref().unwrap_or("(not set)") + ); + println!(" versions dir: {}", oxen_dir.join("versions").display()); + + let patterns = lfs::gitattributes::list_tracked_patterns(&repo_root)?; + if patterns.is_empty() { + println!(" tracked patterns: (none)"); + } else { + println!(" tracked patterns:"); + for p in &patterns { + println!(" {p}"); + } + } + } else { + println!(" Oxen LFS not initialized in this repository."); + } + + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/filter_process.rs b/oxen-rust/src/cli/src/cmd/lfs/filter_process.rs new file mode 100644 index 000000000..623f0a76a --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/filter_process.rs @@ -0,0 +1,31 @@ +use async_trait::async_trait; +use clap::Command; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "filter-process"; +pub struct LfsFilterProcessCmd; + +#[async_trait] +impl RunCmd for LfsFilterProcessCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME).about("Git long-running filter process (invoked by Git, not by users)") + } + + async fn run(&self, _args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let versions_dir = repo_root.join(OXEN_HIDDEN_DIR).join("versions"); + + // Run the blocking filter process loop. + lfs::filter_process::run_filter_process(&versions_dir)?; + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/init.rs b/oxen-rust/src/cli/src/cmd/lfs/init.rs new file mode 100644 index 000000000..36b07fed1 --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/init.rs @@ -0,0 +1,90 @@ +use std::path::Path; + +use async_trait::async_trait; +use clap::{Arg, Command}; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "init"; +pub struct LfsInitCmd; + +#[async_trait] +impl RunCmd for LfsInitCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME) + .about("Initialize Oxen LFS in the current Git repository") + .arg( + Arg::new("remote") + .long("remote") + .help("Oxen remote URL for push/pull of large files"), + ) + } + + async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + + // Verify we are in a git repository. + let git_dir = repo_root.join(".git"); + if !git_dir.exists() { + return Err(OxenError::basic_str( + "Not a git repository. Run `git init` first.", + )); + } + + // Create .oxen/ directory. + let oxen_dir = repo_root.join(OXEN_HIDDEN_DIR); + std::fs::create_dir_all(&oxen_dir)?; + + // Create versions/ directory. + let versions_dir = oxen_dir.join("versions"); + std::fs::create_dir_all(&versions_dir)?; + + // Save LFS config. + let remote_url = args.get_one::("remote").cloned(); + let config = lfs::config::LfsConfig { remote_url }; + config.save(&oxen_dir)?; + + // Install hooks. + lfs::hooks::install_hooks(&git_dir)?; + + // Add .oxen/ to .gitignore. + ensure_gitignore(&repo_root)?; + + println!("Oxen LFS initialized in {}", repo_root.display()); + Ok(()) + } +} + +/// Ensure `.oxen/` is listed in `.gitignore`. +fn ensure_gitignore(repo_root: &Path) -> Result<(), OxenError> { + let gitignore = repo_root.join(".gitignore"); + let pattern = format!("{OXEN_HIDDEN_DIR}/"); + + let existing = if gitignore.exists() { + std::fs::read_to_string(&gitignore)? + } else { + String::new() + }; + + if existing.lines().any(|l| l.trim() == pattern) { + return Ok(()); + } + + let mut content = existing; + if !content.is_empty() && !content.ends_with('\n') { + content.push('\n'); + } + content.push_str(&pattern); + content.push('\n'); + + std::fs::write(&gitignore, content)?; + Ok(()) +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/install.rs b/oxen-rust/src/cli/src/cmd/lfs/install.rs new file mode 100644 index 000000000..2c9c64d21 --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/install.rs @@ -0,0 +1,39 @@ +use async_trait::async_trait; +use clap::{Arg, ArgAction, Command}; + +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "install"; +pub struct LfsInstallCmd; + +#[async_trait] +impl RunCmd for LfsInstallCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME) + .about("Configure Git's global filter driver for Oxen LFS") + .arg( + Arg::new("uninstall") + .long("uninstall") + .help("Remove the global filter driver configuration") + .action(ArgAction::SetTrue), + ) + } + + async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> { + if args.get_flag("uninstall") { + lfs::install::uninstall_global_filter()?; + println!("Oxen LFS global filter uninstalled."); + } else { + lfs::install::install_global_filter()?; + println!("Oxen LFS global filter installed."); + } + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/pull.rs b/oxen-rust/src/cli/src/cmd/lfs/pull.rs new file mode 100644 index 000000000..0fe6939ca --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/pull.rs @@ -0,0 +1,38 @@ +use async_trait::async_trait; +use clap::{Arg, ArgAction, Command}; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "pull"; +pub struct LfsPullCmd; + +#[async_trait] +impl RunCmd for LfsPullCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME) + .about("Pull and restore large files from Oxen remote or local store") + .arg( + Arg::new("local") + .long("local") + .help("Only restore from the local .oxen/versions/ store (no network)") + .action(ArgAction::SetTrue), + ) + } + + async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let oxen_dir = repo_root.join(OXEN_HIDDEN_DIR); + let local_only = args.get_flag("local"); + + lfs::sync::pull_from_remote(&repo_root, &oxen_dir, local_only).await?; + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/push.rs b/oxen-rust/src/cli/src/cmd/lfs/push.rs new file mode 100644 index 000000000..1b086f990 --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/push.rs @@ -0,0 +1,33 @@ +use async_trait::async_trait; +use clap::Command; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "push"; +pub struct LfsPushCmd; + +#[async_trait] +impl RunCmd for LfsPushCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME).about("Push large files to the configured Oxen remote") + } + + async fn run(&self, _args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let oxen_dir = repo_root.join(OXEN_HIDDEN_DIR); + + // Collect remaining args that were passed by the pre-push hook. + let hook_args: Vec = std::env::args().collect(); + + lfs::sync::push_to_remote(&repo_root, &oxen_dir, &hook_args).await?; + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/smudge.rs b/oxen-rust/src/cli/src/cmd/lfs/smudge.rs new file mode 100644 index 000000000..e1c007c67 --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/smudge.rs @@ -0,0 +1,55 @@ +use async_trait::async_trait; +use clap::{Arg, Command}; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "smudge"; +pub struct LfsSmudgeCmd; + +#[async_trait] +impl RunCmd for LfsSmudgeCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME) + .about("Smudge filter for a single file (invoked by Git)") + .arg(Arg::new("separator").long("").hide(true)) + .arg( + Arg::new("file") + .help("Path to the file being smudged") + .required(false), + ) + } + + async fn run(&self, _args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let oxen_dir = repo_root.join(OXEN_HIDDEN_DIR); + let versions_dir = oxen_dir.join("versions"); + let config = lfs::config::LfsConfig::load(&oxen_dir)?; + + // Read pointer data from stdin. + let pointer_data = { + use std::io::Read; + let mut buf = Vec::new(); + std::io::stdin().read_to_end(&mut buf)?; + buf + }; + + let result = lfs::filter::smudge(&versions_dir, &config, &pointer_data).await?; + + // Write result to stdout. + { + use std::io::Write; + std::io::stdout().write_all(&result)?; + std::io::stdout().flush()?; + } + + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/status.rs b/oxen-rust/src/cli/src/cmd/lfs/status.rs new file mode 100644 index 000000000..e5d6be51c --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/status.rs @@ -0,0 +1,53 @@ +use async_trait::async_trait; +use clap::Command; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "status"; +pub struct LfsStatusCmd; + +#[async_trait] +impl RunCmd for LfsStatusCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME).about("Show status of Oxen LFS tracked files") + } + + async fn run(&self, _args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let oxen_dir = repo_root.join(OXEN_HIDDEN_DIR); + let versions_dir = oxen_dir.join("versions"); + + if !versions_dir.exists() { + println!("Oxen LFS not initialized. Run `oxen lfs init` first."); + return Ok(()); + } + + let statuses = lfs::status::get_status(&repo_root, &versions_dir).await?; + + if statuses.is_empty() { + println!("No LFS tracked files found."); + return Ok(()); + } + + for s in &statuses { + let local_indicator = if s.local { "local" } else { "missing" }; + println!( + "{} ({}, {} bytes, {})", + s.path.display(), + s.pointer.oid, + s.pointer.size, + local_indicator, + ); + } + + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/track.rs b/oxen-rust/src/cli/src/cmd/lfs/track.rs new file mode 100644 index 000000000..94e3d4a95 --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/track.rs @@ -0,0 +1,51 @@ +use async_trait::async_trait; +use clap::{Arg, Command}; + +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "track"; +pub struct LfsTrackCmd; + +#[async_trait] +impl RunCmd for LfsTrackCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME) + .about("Track a file pattern with Oxen LFS") + .arg( + Arg::new("pattern") + .help("File glob pattern to track (e.g. \"*.bin\", \"datasets/**\")") + .required(false), + ) + } + + async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + + match args.get_one::("pattern") { + Some(pattern) => { + lfs::gitattributes::track_pattern(&repo_root, pattern)?; + println!("Tracking \"{pattern}\""); + } + None => { + // No pattern: list currently tracked patterns. + let patterns = lfs::gitattributes::list_tracked_patterns(&repo_root)?; + if patterns.is_empty() { + println!("No patterns tracked by Oxen LFS."); + } else { + println!("Patterns tracked by Oxen LFS:"); + for p in &patterns { + println!(" {p}"); + } + } + } + } + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/untrack.rs b/oxen-rust/src/cli/src/cmd/lfs/untrack.rs new file mode 100644 index 000000000..3a1c0f046 --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/untrack.rs @@ -0,0 +1,38 @@ +use async_trait::async_trait; +use clap::{Arg, Command}; + +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "untrack"; +pub struct LfsUntrackCmd; + +#[async_trait] +impl RunCmd for LfsUntrackCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME) + .about("Stop tracking a file pattern with Oxen LFS") + .arg( + Arg::new("pattern") + .help("File glob pattern to untrack") + .required(true), + ) + } + + async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let pattern = args + .get_one::("pattern") + .ok_or_else(|| OxenError::basic_str("pattern is required"))?; + + lfs::gitattributes::untrack_pattern(&repo_root, pattern)?; + println!("Untracking \"{pattern}\""); + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/main.rs b/oxen-rust/src/cli/src/main.rs index 71467c1c2..44bff438c 100644 --- a/oxen-rust/src/cli/src/main.rs +++ b/oxen-rust/src/cli/src/main.rs @@ -62,6 +62,7 @@ async fn async_main() -> ExitCode { Box::new(cmd::EmbeddingsCmd), Box::new(cmd::InfoCmd), Box::new(cmd::InitCmd), + Box::new(cmd::LfsCmd), Box::new(cmd::LoadCmd), Box::new(cmd::LogCmd), Box::new(cmd::LsCmd), diff --git a/oxen-rust/src/lib/src/constants.rs b/oxen-rust/src/lib/src/constants.rs index 244a0c7ed..40611b218 100644 --- a/oxen-rust/src/lib/src/constants.rs +++ b/oxen-rust/src/lib/src/constants.rs @@ -273,3 +273,9 @@ pub fn chunk_size() -> u64 { // Oxen request Id pub const OXEN_REQUEST_ID: &str = "x-oxen-request-id"; + +// LFS +/// Name of the LFS config file inside .oxen/ +pub const LFS_CONFIG_FILENAME: &str = "lfs.toml"; +/// Subdirectory under .oxen/ where large file versions are cached +pub const LFS_VERSIONS_DIR: &str = "versions"; diff --git a/oxen-rust/src/lib/src/lfs.rs b/oxen-rust/src/lib/src/lfs.rs new file mode 100644 index 000000000..33f16afe9 --- /dev/null +++ b/oxen-rust/src/lib/src/lfs.rs @@ -0,0 +1,9 @@ +pub mod config; +pub mod filter; +pub mod filter_process; +pub mod gitattributes; +pub mod hooks; +pub mod install; +pub mod pointer; +pub mod status; +pub mod sync; diff --git a/oxen-rust/src/lib/src/lfs/config.rs b/oxen-rust/src/lib/src/lfs/config.rs new file mode 100644 index 000000000..4be073a1c --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/config.rs @@ -0,0 +1,60 @@ +use std::path::Path; + +use serde::{Deserialize, Serialize}; + +use crate::error::OxenError; + +const LFS_CONFIG_FILENAME: &str = "lfs.toml"; + +/// Configuration stored in `.oxen/lfs.toml` within a Git repository. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct LfsConfig { + /// Optional Oxen remote URL for push/pull of large file content. + pub remote_url: Option, +} + +impl LfsConfig { + /// Load from `/lfs.toml`. Returns defaults if the file does not exist. + pub fn load(oxen_dir: &Path) -> Result { + let path = oxen_dir.join(LFS_CONFIG_FILENAME); + if !path.exists() { + return Ok(Self::default()); + } + let text = std::fs::read_to_string(&path)?; + let config: LfsConfig = toml::from_str(&text).map_err(OxenError::TomlDe)?; + Ok(config) + } + + /// Persist to `/lfs.toml`. + pub fn save(&self, oxen_dir: &Path) -> Result<(), OxenError> { + let path = oxen_dir.join(LFS_CONFIG_FILENAME); + let text = toml::to_string_pretty(self).map_err(OxenError::TomlSer)?; + std::fs::write(&path, text)?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_config_save_and_load() { + let tmp = TempDir::new().unwrap(); + let cfg = LfsConfig { + remote_url: Some("https://hub.oxen.ai/user/repo".to_string()), + }; + cfg.save(tmp.path()).unwrap(); + + let loaded = LfsConfig::load(tmp.path()).unwrap(); + assert_eq!(loaded.remote_url, cfg.remote_url); + } + + #[test] + fn test_config_load_defaults_when_missing() { + let tmp = TempDir::new().unwrap(); + let cfg = LfsConfig::load(tmp.path()).unwrap(); + assert!(cfg.remote_url.is_none()); + } +} diff --git a/oxen-rust/src/lib/src/lfs/filter.rs b/oxen-rust/src/lib/src/lfs/filter.rs new file mode 100644 index 000000000..06b3fa50b --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/filter.rs @@ -0,0 +1,143 @@ +use std::path::Path; + +use crate::error::OxenError; +use crate::lfs::config::LfsConfig; +use crate::lfs::pointer::PointerFile; +use crate::storage::version_store::VersionStore; +use crate::storage::LocalVersionStore; +use crate::util::hasher; + +/// Clean filter: hash content, store in version store, return pointer bytes. +/// +/// If the input is already a valid pointer it is returned unchanged (idempotent). +pub async fn clean(versions_dir: &Path, content: &[u8]) -> Result, OxenError> { + // Idempotent: don't re-clean a pointer. + if PointerFile::is_pointer(content) { + return Ok(content.to_vec()); + } + + let hash = hasher::hash_buffer(content); + + let store = LocalVersionStore::new(versions_dir); + store.init().await?; + store.store_version(&hash, content).await?; + + let pointer = PointerFile::new(&hash, content.len() as u64); + Ok(pointer.encode()) +} + +/// Smudge filter: parse pointer, look up content in version store, return content. +/// +/// Strategy: local store first, then remote fetch (if configured), then fall back +/// to returning the pointer bytes unchanged with a warning. +pub async fn smudge( + versions_dir: &Path, + _lfs_config: &LfsConfig, + pointer_data: &[u8], +) -> Result, OxenError> { + // Not a pointer — return data as-is. + let pointer = match PointerFile::decode(pointer_data) { + Some(p) => p, + None => return Ok(pointer_data.to_vec()), + }; + + let store = LocalVersionStore::new(versions_dir); + + // 1. Try local store. + if store.version_exists(&pointer.oid).await? { + return store.get_version(&pointer.oid).await; + } + + // 2. TODO (Phase 3): fetch from Oxen remote with timeout. + + // 3. Fallback — return pointer bytes and warn. + log::warn!( + "oxen lfs smudge: content for {} not available locally; run `oxen lfs pull`", + pointer.oid, + ); + Ok(pointer_data.to_vec()) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[tokio::test] + async fn test_clean_stores_and_returns_pointer() { + let tmp = TempDir::new().unwrap(); + let versions_dir = tmp.path().join("versions"); + + let content = b"hello world, this is a large file"; + let result = clean(&versions_dir, content).await.unwrap(); + + // Result should be a valid pointer. + let ptr = PointerFile::decode(&result).expect("should be a pointer"); + assert_eq!(ptr.size, content.len() as u64); + + // Content should be in the store. + let store = LocalVersionStore::new(&versions_dir); + assert!(store.version_exists(&ptr.oid).await.unwrap()); + let stored = store.get_version(&ptr.oid).await.unwrap(); + assert_eq!(stored, content); + } + + #[tokio::test] + async fn test_clean_is_idempotent() { + let tmp = TempDir::new().unwrap(); + let versions_dir = tmp.path().join("versions"); + + let content = b"some content"; + let pointer_bytes = clean(&versions_dir, content).await.unwrap(); + + // Cleaning the pointer again should return it unchanged. + let double = clean(&versions_dir, &pointer_bytes).await.unwrap(); + assert_eq!(pointer_bytes, double); + } + + #[tokio::test] + async fn test_smudge_restores_content() { + let tmp = TempDir::new().unwrap(); + let versions_dir = tmp.path().join("versions"); + let config = LfsConfig::default(); + + let content = b"restore me"; + let pointer_bytes = clean(&versions_dir, content).await.unwrap(); + + let restored = smudge(&versions_dir, &config, &pointer_bytes) + .await + .unwrap(); + assert_eq!(restored, content); + } + + #[tokio::test] + async fn test_smudge_passthrough_non_pointer() { + let tmp = TempDir::new().unwrap(); + let versions_dir = tmp.path().join("versions"); + let config = LfsConfig::default(); + + let data = b"not a pointer"; + let result = smudge(&versions_dir, &config, data).await.unwrap(); + assert_eq!(result, data); + } + + #[tokio::test] + async fn test_smudge_fallback_when_missing() { + let tmp = TempDir::new().unwrap(); + let versions_dir = tmp.path().join("versions"); + let config = LfsConfig::default(); + + // Fabricate a pointer whose content is NOT in the store. + let ptr = PointerFile::new("a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8", 999); + let pointer_bytes = ptr.encode(); + + // Create the versions dir so version_exists doesn't fail. + std::fs::create_dir_all(&versions_dir).unwrap(); + + let result = smudge(&versions_dir, &config, &pointer_bytes) + .await + .unwrap(); + // Falls back to returning the pointer bytes. + assert_eq!(result, pointer_bytes); + } +} diff --git a/oxen-rust/src/lib/src/lfs/filter_process.rs b/oxen-rust/src/lib/src/lfs/filter_process.rs new file mode 100644 index 000000000..cf811ba3c --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/filter_process.rs @@ -0,0 +1,224 @@ +use std::io::{self, BufReader, BufWriter, Write}; +use std::path::Path; + +use crate::error::OxenError; +use crate::lfs::config::LfsConfig; +use crate::lfs::filter; + +/// pkt-line helpers for the Git long-running filter protocol. +pub mod pkt_line { + use std::io::{self, BufRead, Write}; + + const MAX_PKT_PAYLOAD: usize = 65516; + + /// Write a pkt-line text packet (adds newline automatically). + pub fn write_text(w: &mut impl Write, text: &str) -> io::Result<()> { + let payload = format!("{text}\n"); + let len = payload.len() + 4; // 4-byte length prefix + write!(w, "{len:04x}{payload}")?; + Ok(()) + } + + /// Write binary data as one or more pkt-line packets. + pub fn write_binary(w: &mut impl Write, data: &[u8]) -> io::Result<()> { + for chunk in data.chunks(MAX_PKT_PAYLOAD) { + let len = chunk.len() + 4; + write!(w, "{len:04x}")?; + w.write_all(chunk)?; + } + Ok(()) + } + + /// Write a flush packet (0000). + pub fn write_flush(w: &mut impl Write) -> io::Result<()> { + w.write_all(b"0000")?; + Ok(()) + } + + /// Read one pkt-line packet. Returns `None` on flush (0000) or EOF. + pub fn read_packet(r: &mut impl BufRead) -> io::Result>> { + let mut len_buf = [0u8; 4]; + match r.read_exact(&mut len_buf) { + Ok(()) => {} + Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None), + Err(e) => return Err(e), + } + + let len_str = std::str::from_utf8(&len_buf) + .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "invalid pkt-line length"))?; + + let len = usize::from_str_radix(len_str, 16) + .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "invalid pkt-line hex"))?; + + if len == 0 { + // Flush packet. + return Ok(None); + } + + if len < 4 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "pkt-line length too small", + )); + } + + let payload_len = len - 4; + let mut payload = vec![0u8; payload_len]; + r.read_exact(&mut payload)?; + Ok(Some(payload)) + } + + /// Read all packets until flush, concatenating their payloads. + pub fn read_until_flush(r: &mut impl BufRead) -> io::Result> { + let mut result = Vec::new(); + while let Some(pkt) = read_packet(r)? { + result.extend_from_slice(&pkt); + } + Ok(result) + } + + /// Read text key=value pairs until flush. + pub fn read_text_pairs_until_flush(r: &mut impl BufRead) -> io::Result> { + let mut pairs = Vec::new(); + while let Some(pkt) = read_packet(r)? { + let text = String::from_utf8_lossy(&pkt); + let text = text.trim_end_matches('\n'); + if let Some((key, value)) = text.split_once('=') { + pairs.push((key.to_string(), value.to_string())); + } + } + Ok(pairs) + } +} + +/// Run the long-running Git filter process on stdin/stdout. +/// +/// This implements the protocol described in `gitattributes(5)` under +/// "Long Running Filter Process". +pub fn run_filter_process(versions_dir: &Path) -> Result<(), OxenError> { + let stdin = io::stdin(); + let stdout = io::stdout(); + let mut reader = BufReader::new(stdin.lock()); + let mut writer = BufWriter::new(stdout.lock()); + + let lfs_config = LfsConfig::load(versions_dir.parent().unwrap_or(Path::new(".")))?; + + // --- Handshake --- + // Read welcome message. + let welcome = pkt_line::read_until_flush(&mut reader)?; + let welcome_str = String::from_utf8_lossy(&welcome); + if !welcome_str.contains("git-filter-client") { + return Err(OxenError::basic_str("expected git-filter-client handshake")); + } + + // Read version. + let _version = pkt_line::read_until_flush(&mut reader)?; + + // Send our welcome + version. + pkt_line::write_text(&mut writer, "git-filter-server")?; + pkt_line::write_flush(&mut writer)?; + pkt_line::write_text(&mut writer, "version=2")?; + pkt_line::write_flush(&mut writer)?; + writer.flush()?; + + // Read capabilities. + let _caps = pkt_line::read_text_pairs_until_flush(&mut reader)?; + + // Advertise our capabilities. + pkt_line::write_text(&mut writer, "capability=clean")?; + pkt_line::write_text(&mut writer, "capability=smudge")?; + pkt_line::write_flush(&mut writer)?; + writer.flush()?; + + // Build a tokio runtime for async version-store operations. + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .map_err(|e| OxenError::basic_str(format!("failed to build tokio runtime: {e}")))?; + + // --- Per-file loop --- + loop { + // Read command + pathname (key=value pairs until flush). + let pairs = match pkt_line::read_text_pairs_until_flush(&mut reader) { + Ok(p) if p.is_empty() => break, // EOF / no more commands + Ok(p) => p, + Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break, + Err(e) => return Err(OxenError::IO(e)), + }; + + let command = pairs + .iter() + .find(|(k, _)| k == "command") + .map(|(_, v)| v.as_str()) + .unwrap_or(""); + + // Read content until flush. + let content = pkt_line::read_until_flush(&mut reader)?; + + let result = match command { + "clean" => rt.block_on(filter::clean(versions_dir, &content))?, + "smudge" => rt.block_on(filter::smudge(versions_dir, &lfs_config, &content))?, + other => { + log::warn!("oxen lfs filter-process: unknown command '{other}', passing through"); + content + } + }; + + // Write status=success, flush, content, flush, flush. + pkt_line::write_text(&mut writer, "status=success")?; + pkt_line::write_flush(&mut writer)?; + pkt_line::write_binary(&mut writer, &result)?; + pkt_line::write_flush(&mut writer)?; + pkt_line::write_flush(&mut writer)?; + writer.flush()?; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::pkt_line::*; + use std::io::Cursor; + + #[test] + fn test_pkt_line_roundtrip_text() { + let mut buf = Vec::new(); + write_text(&mut buf, "hello").unwrap(); + write_flush(&mut buf).unwrap(); + + let mut reader = Cursor::new(buf); + let pkt = read_packet(&mut reader).unwrap().unwrap(); + assert_eq!(String::from_utf8_lossy(&pkt), "hello\n"); + + // Next read should be flush => None + let flush = read_packet(&mut reader).unwrap(); + assert!(flush.is_none()); + } + + #[test] + fn test_pkt_line_binary() { + let data = vec![0u8; 100]; + let mut buf = Vec::new(); + write_binary(&mut buf, &data).unwrap(); + write_flush(&mut buf).unwrap(); + + let mut reader = Cursor::new(buf); + let result = read_until_flush(&mut reader).unwrap(); + assert_eq!(result, data); + } + + #[test] + fn test_read_text_pairs() { + let mut buf = Vec::new(); + write_text(&mut buf, "command=clean").unwrap(); + write_text(&mut buf, "pathname=test.bin").unwrap(); + write_flush(&mut buf).unwrap(); + + let mut reader = Cursor::new(buf); + let pairs = read_text_pairs_until_flush(&mut reader).unwrap(); + assert_eq!(pairs.len(), 2); + assert_eq!(pairs[0], ("command".to_string(), "clean".to_string())); + assert_eq!(pairs[1], ("pathname".to_string(), "test.bin".to_string())); + } +} diff --git a/oxen-rust/src/lib/src/lfs/gitattributes.rs b/oxen-rust/src/lib/src/lfs/gitattributes.rs new file mode 100644 index 000000000..fad1fc12d --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/gitattributes.rs @@ -0,0 +1,149 @@ +use std::path::Path; + +use crate::error::OxenError; + +const GITATTRIBUTES: &str = ".gitattributes"; + +/// The filter/diff/merge attributes appended to each tracked pattern. +const ATTR_SUFFIX: &str = "filter=oxen diff=oxen merge=oxen -text"; + +/// Format a .gitattributes line for a given pattern. +fn format_line(pattern: &str) -> String { + format!("{pattern} {ATTR_SUFFIX}") +} + +/// Add a pattern to `.gitattributes` (idempotent — skips if already present). +pub fn track_pattern(repo_root: &Path, pattern: &str) -> Result<(), OxenError> { + let ga_path = repo_root.join(GITATTRIBUTES); + let line = format_line(pattern); + + let existing = if ga_path.exists() { + std::fs::read_to_string(&ga_path)? + } else { + String::new() + }; + + // Already tracked? + if existing.lines().any(|l| l.trim() == line.trim()) { + return Ok(()); + } + + // Append (ensure trailing newline in existing content). + let mut content = existing; + if !content.is_empty() && !content.ends_with('\n') { + content.push('\n'); + } + content.push_str(&line); + content.push('\n'); + + std::fs::write(&ga_path, content)?; + Ok(()) +} + +/// Remove a pattern from `.gitattributes`. +pub fn untrack_pattern(repo_root: &Path, pattern: &str) -> Result<(), OxenError> { + let ga_path = repo_root.join(GITATTRIBUTES); + if !ga_path.exists() { + return Ok(()); + } + + let line = format_line(pattern); + let existing = std::fs::read_to_string(&ga_path)?; + let filtered: Vec<&str> = existing + .lines() + .filter(|l| l.trim() != line.trim()) + .collect(); + + let mut content = filtered.join("\n"); + if !content.is_empty() { + content.push('\n'); + } + + std::fs::write(&ga_path, content)?; + Ok(()) +} + +/// List all patterns currently tracked with the oxen filter. +pub fn list_tracked_patterns(repo_root: &Path) -> Result, OxenError> { + let ga_path = repo_root.join(GITATTRIBUTES); + if !ga_path.exists() { + return Ok(Vec::new()); + } + + let text = std::fs::read_to_string(&ga_path)?; + let patterns = text + .lines() + .filter(|l| l.contains(ATTR_SUFFIX)) + .filter_map(|l| { + let trimmed = l.trim(); + trimmed + .strip_suffix(ATTR_SUFFIX) + .map(|p| p.trim().to_string()) + }) + .collect(); + Ok(patterns) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_track_creates_gitattributes() { + let tmp = TempDir::new().unwrap(); + track_pattern(tmp.path(), "*.bin").unwrap(); + + let content = std::fs::read_to_string(tmp.path().join(GITATTRIBUTES)).unwrap(); + assert!(content.contains("*.bin filter=oxen diff=oxen merge=oxen -text")); + } + + #[test] + fn test_track_is_idempotent() { + let tmp = TempDir::new().unwrap(); + track_pattern(tmp.path(), "*.bin").unwrap(); + track_pattern(tmp.path(), "*.bin").unwrap(); + + let content = std::fs::read_to_string(tmp.path().join(GITATTRIBUTES)).unwrap(); + assert_eq!( + content.matches("*.bin").count(), + 1, + "pattern should appear only once" + ); + } + + #[test] + fn test_track_multiple_patterns() { + let tmp = TempDir::new().unwrap(); + track_pattern(tmp.path(), "*.bin").unwrap(); + track_pattern(tmp.path(), "datasets/**").unwrap(); + + let patterns = list_tracked_patterns(tmp.path()).unwrap(); + assert_eq!(patterns, vec!["*.bin", "datasets/**"]); + } + + #[test] + fn test_untrack_removes_pattern() { + let tmp = TempDir::new().unwrap(); + track_pattern(tmp.path(), "*.bin").unwrap(); + track_pattern(tmp.path(), "*.pt").unwrap(); + untrack_pattern(tmp.path(), "*.bin").unwrap(); + + let patterns = list_tracked_patterns(tmp.path()).unwrap(); + assert_eq!(patterns, vec!["*.pt"]); + } + + #[test] + fn test_untrack_noop_when_missing() { + let tmp = TempDir::new().unwrap(); + // No error when file doesn't exist. + untrack_pattern(tmp.path(), "*.bin").unwrap(); + } + + #[test] + fn test_list_empty() { + let tmp = TempDir::new().unwrap(); + let patterns = list_tracked_patterns(tmp.path()).unwrap(); + assert!(patterns.is_empty()); + } +} diff --git a/oxen-rust/src/lib/src/lfs/hooks.rs b/oxen-rust/src/lib/src/lfs/hooks.rs new file mode 100644 index 000000000..9a58e5057 --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/hooks.rs @@ -0,0 +1,173 @@ +use std::path::Path; + +use crate::error::OxenError; + +/// Marker comment used to identify our hook sections. +const HOOK_MARKER: &str = "# oxen lfs"; + +/// Install pre-push, post-checkout, and post-merge hooks into `.git/hooks/`. +/// +/// Idempotent: checks for existing `oxen lfs` content before appending. +/// Respects existing hook scripts by appending rather than overwriting. +pub fn install_hooks(git_dir: &Path) -> Result<(), OxenError> { + let hooks_dir = git_dir.join("hooks"); + std::fs::create_dir_all(&hooks_dir)?; + + install_hook( + &hooks_dir, + "pre-push", + &format!( + r#"{HOOK_MARKER} +command -v oxen >/dev/null 2>&1 || {{ echo >&2 "oxen not found in PATH, skipping LFS pre-push hook"; exit 0; }} +oxen lfs push "$@" +"# + ), + )?; + + install_hook( + &hooks_dir, + "post-checkout", + &format!( + r#"{HOOK_MARKER} +command -v oxen >/dev/null 2>&1 || exit 0 +oxen lfs pull --local +"# + ), + )?; + + install_hook( + &hooks_dir, + "post-merge", + &format!( + r#"{HOOK_MARKER} +command -v oxen >/dev/null 2>&1 || exit 0 +oxen lfs pull --local +"# + ), + )?; + + Ok(()) +} + +fn install_hook(hooks_dir: &Path, name: &str, snippet: &str) -> Result<(), OxenError> { + let hook_path = hooks_dir.join(name); + + let existing = if hook_path.exists() { + std::fs::read_to_string(&hook_path)? + } else { + String::new() + }; + + // Already installed? + if existing.contains(HOOK_MARKER) { + return Ok(()); + } + + let mut content = if existing.is_empty() { + "#!/bin/sh\n".to_string() + } else { + let mut s = existing; + if !s.ends_with('\n') { + s.push('\n'); + } + s + }; + + content.push('\n'); + content.push_str(snippet); + + std::fs::write(&hook_path, &content)?; + + // Make executable on Unix. + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = std::fs::metadata(&hook_path)?.permissions(); + perms.set_mode(0o755); + std::fs::set_permissions(&hook_path, perms)?; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_install_hooks_creates_files() { + let tmp = TempDir::new().unwrap(); + let git_dir = tmp.path().join(".git"); + std::fs::create_dir_all(&git_dir).unwrap(); + + install_hooks(&git_dir).unwrap(); + + let hooks_dir = git_dir.join("hooks"); + assert!(hooks_dir.join("pre-push").exists()); + assert!(hooks_dir.join("post-checkout").exists()); + assert!(hooks_dir.join("post-merge").exists()); + + // Check content. + let pre_push = std::fs::read_to_string(hooks_dir.join("pre-push")).unwrap(); + assert!(pre_push.contains("oxen lfs push")); + assert!(pre_push.starts_with("#!/bin/sh")); + } + + #[test] + fn test_install_hooks_idempotent() { + let tmp = TempDir::new().unwrap(); + let git_dir = tmp.path().join(".git"); + std::fs::create_dir_all(&git_dir).unwrap(); + + install_hooks(&git_dir).unwrap(); + install_hooks(&git_dir).unwrap(); + + let pre_push = std::fs::read_to_string(git_dir.join("hooks/pre-push")).unwrap(); + assert_eq!( + pre_push.matches("oxen lfs push").count(), + 1, + "should not duplicate hook content" + ); + } + + #[test] + fn test_install_hooks_preserves_existing() { + let tmp = TempDir::new().unwrap(); + let git_dir = tmp.path().join(".git"); + let hooks_dir = git_dir.join("hooks"); + std::fs::create_dir_all(&hooks_dir).unwrap(); + + // Pre-existing hook script. + std::fs::write( + hooks_dir.join("pre-push"), + "#!/bin/sh\necho 'existing hook'\n", + ) + .unwrap(); + + install_hooks(&git_dir).unwrap(); + + let content = std::fs::read_to_string(hooks_dir.join("pre-push")).unwrap(); + assert!( + content.contains("existing hook"), + "should preserve existing" + ); + assert!(content.contains("oxen lfs push"), "should add our hook"); + } + + #[cfg(unix)] + #[test] + fn test_hooks_are_executable() { + use std::os::unix::fs::PermissionsExt; + + let tmp = TempDir::new().unwrap(); + let git_dir = tmp.path().join(".git"); + std::fs::create_dir_all(&git_dir).unwrap(); + + install_hooks(&git_dir).unwrap(); + + let meta = std::fs::metadata(git_dir.join("hooks/pre-push")).unwrap(); + let mode = meta.permissions().mode(); + assert!(mode & 0o111 != 0, "hook should be executable"); + } +} diff --git a/oxen-rust/src/lib/src/lfs/install.rs b/oxen-rust/src/lib/src/lfs/install.rs new file mode 100644 index 000000000..f601195dc --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/install.rs @@ -0,0 +1,63 @@ +use std::process::Command; + +use crate::error::OxenError; + +/// Configure Git's global filter driver so that every repository +/// using `filter=oxen` will invoke our clean/smudge process. +/// +/// Sets in `~/.gitconfig`: +/// ```text +/// [filter "oxen"] +/// process = oxen lfs filter-process +/// required = true +/// clean = oxen lfs clean -- %f +/// smudge = oxen lfs smudge -- %f +/// ``` +pub fn install_global_filter() -> Result<(), OxenError> { + git_config_global("filter.oxen.process", "oxen lfs filter-process")?; + git_config_global("filter.oxen.required", "true")?; + git_config_global("filter.oxen.clean", "oxen lfs clean -- %f")?; + git_config_global("filter.oxen.smudge", "oxen lfs smudge -- %f")?; + Ok(()) +} + +/// Remove the global filter driver configuration. +pub fn uninstall_global_filter() -> Result<(), OxenError> { + // --remove-section fails if the section doesn't exist, so ignore errors. + let _ = Command::new("git") + .args(["config", "--global", "--remove-section", "filter.oxen"]) + .output(); + Ok(()) +} + +fn git_config_global(key: &str, value: &str) -> Result<(), OxenError> { + let output = Command::new("git") + .args(["config", "--global", key, value]) + .output() + .map_err(|e| OxenError::basic_str(format!("failed to run git config: {e}")))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(OxenError::basic_str(format!( + "git config --global {key} failed: {stderr}" + ))); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + // Note: We don't run install/uninstall in tests to avoid modifying the + // developer's actual ~/.gitconfig. Integration tests with isolated HOME + // can cover this. + + use super::*; + + #[test] + fn test_install_and_uninstall_do_not_panic() { + // Smoke test: just verify the functions can be called without panic. + // Actual git config changes are tested in integration tests. + let _ = install_global_filter(); + let _ = uninstall_global_filter(); + } +} diff --git a/oxen-rust/src/lib/src/lfs/pointer.rs b/oxen-rust/src/lib/src/lfs/pointer.rs new file mode 100644 index 000000000..63a7c0930 --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/pointer.rs @@ -0,0 +1,161 @@ +use std::fmt; + +/// First line of every Oxen LFS pointer file. +pub const POINTER_VERSION_LINE: &str = "version https://oxen.ai/spec/v1"; + +/// Hash algorithm identifier used in pointer files. +pub const HASH_ALGO: &str = "xxh3"; + +/// Pointer files should never exceed this size in bytes. +pub const MAX_POINTER_SIZE: usize = 200; + +/// Represents an Oxen LFS pointer — a small stand-in stored in Git +/// that references content kept in the Oxen version store. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PointerFile { + /// 32-char lowercase hex hash (no algorithm prefix). + pub oid: String, + /// Size in bytes of the original content. + pub size: u64, +} + +impl PointerFile { + pub fn new(hash: &str, size: u64) -> Self { + Self { + oid: hash.to_string(), + size, + } + } + + /// Serialize to the canonical pointer text (UTF-8, newline-terminated). + pub fn encode(&self) -> Vec { + self.to_string().into_bytes() + } + + /// Try to parse a byte slice as a pointer file. + /// Returns `None` when the data is not a valid pointer. + pub fn decode(data: &[u8]) -> Option { + if data.len() > MAX_POINTER_SIZE { + return None; + } + + let text = std::str::from_utf8(data).ok()?; + let mut lines = text.lines(); + + // Line 1: version + let version_line = lines.next()?; + if version_line != POINTER_VERSION_LINE { + return None; + } + + // Line 2: oid : + let oid_line = lines.next()?; + let oid_value = oid_line.strip_prefix("oid ")?; + let hash = oid_value.strip_prefix(&format!("{HASH_ALGO}:"))?; + + // Validate hex string (should be 32 chars for xxh3_128) + if hash.len() != 32 || !hash.chars().all(|c| c.is_ascii_hexdigit()) { + return None; + } + + // Line 3: size + let size_line = lines.next()?; + let size_value = size_line.strip_prefix("size ")?; + let size: u64 = size_value.parse().ok()?; + + // No extra lines allowed (other than a trailing newline which .lines() skips) + if lines.next().is_some() { + return None; + } + + Some(Self { + oid: hash.to_string(), + size, + }) + } + + /// Quick check: is this byte slice a valid Oxen LFS pointer? + pub fn is_pointer(data: &[u8]) -> bool { + if data.len() > MAX_POINTER_SIZE { + return false; + } + // Fast path: check the version prefix before full parse. + data.starts_with(POINTER_VERSION_LINE.as_bytes()) && Self::decode(data).is_some() + } +} + +impl fmt::Display for PointerFile { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}\noid {}:{}\nsize {}\n", + POINTER_VERSION_LINE, HASH_ALGO, self.oid, self.size, + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pointer_roundtrip() { + let ptr = PointerFile::new("a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8", 1234567890); + let encoded = ptr.encode(); + let decoded = PointerFile::decode(&encoded).expect("should decode"); + assert_eq!(ptr, decoded); + } + + #[test] + fn test_pointer_format() { + let ptr = PointerFile::new("a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8", 42); + let text = String::from_utf8(ptr.encode()).unwrap(); + assert_eq!( + text, + "version https://oxen.ai/spec/v1\noid xxh3:a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8\nsize 42\n" + ); + } + + #[test] + fn test_is_pointer_true() { + let ptr = PointerFile::new("a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8", 100); + assert!(PointerFile::is_pointer(&ptr.encode())); + } + + #[test] + fn test_is_pointer_false_random_data() { + assert!(!PointerFile::is_pointer( + b"hello world, this is not a pointer" + )); + } + + #[test] + fn test_is_pointer_false_too_large() { + let big = vec![b'x'; MAX_POINTER_SIZE + 1]; + assert!(!PointerFile::is_pointer(&big)); + } + + #[test] + fn test_decode_rejects_bad_hash_length() { + let bad = b"version https://oxen.ai/spec/v1\noid xxh3:abc123\nsize 10\n"; + assert!(PointerFile::decode(bad).is_none()); + } + + #[test] + fn test_decode_rejects_extra_lines() { + let bad = b"version https://oxen.ai/spec/v1\noid xxh3:a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8\nsize 10\nextra\n"; + assert!(PointerFile::decode(bad).is_none()); + } + + #[test] + fn test_decode_rejects_wrong_version() { + let bad = b"version https://git-lfs.github.com/spec/v1\noid xxh3:a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8\nsize 10\n"; + assert!(PointerFile::decode(bad).is_none()); + } + + #[test] + fn test_decode_rejects_wrong_algorithm() { + let bad = b"version https://oxen.ai/spec/v1\noid sha256:a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8\nsize 10\n"; + assert!(PointerFile::decode(bad).is_none()); + } +} diff --git a/oxen-rust/src/lib/src/lfs/status.rs b/oxen-rust/src/lib/src/lfs/status.rs new file mode 100644 index 000000000..6ab8f7abe --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/status.rs @@ -0,0 +1,121 @@ +use std::path::{Path, PathBuf}; + +use crate::error::OxenError; +use crate::lfs::gitattributes; +use crate::lfs::pointer::PointerFile; +use crate::storage::version_store::VersionStore; +use crate::storage::LocalVersionStore; + +/// Status information for a single LFS-tracked file. +#[derive(Debug)] +pub struct LfsFileStatus { + /// Path relative to repo root. + pub path: PathBuf, + /// The parsed pointer. + pub pointer: PointerFile, + /// Whether the actual content is available in the local version store. + pub local: bool, +} + +/// Walk the working tree, find pointer files that match tracked patterns, +/// and report their status. +pub async fn get_status( + repo_root: &Path, + versions_dir: &Path, +) -> Result, OxenError> { + let patterns = gitattributes::list_tracked_patterns(repo_root)?; + if patterns.is_empty() { + return Ok(Vec::new()); + } + + let store = LocalVersionStore::new(versions_dir); + let mut results = Vec::new(); + + // Build glob matchers. + let matchers: Vec = patterns + .iter() + .filter_map(|p| glob::Pattern::new(p).ok()) + .collect(); + + // Walk the working tree. + for entry in walkdir::WalkDir::new(repo_root) + .into_iter() + .filter_entry(|e| { + // Skip .git and .oxen directories. + let name = e.file_name().to_string_lossy(); + name != ".git" && name != ".oxen" + }) + { + let entry = match entry { + Ok(e) => e, + Err(_) => continue, + }; + + if !entry.file_type().is_file() { + continue; + } + + let rel_path = match entry.path().strip_prefix(repo_root) { + Ok(p) => p, + Err(_) => continue, + }; + + let rel_str = rel_path.to_string_lossy(); + + // Check if the file matches any tracked pattern. + let matched = matchers.iter().any(|m| m.matches(&rel_str)); + if !matched { + continue; + } + + // Read the file and check if it's a pointer. + let data = match std::fs::read(entry.path()) { + Ok(d) => d, + Err(_) => continue, + }; + + if let Some(pointer) = PointerFile::decode(&data) { + let local = store.version_exists(&pointer.oid).await.unwrap_or(false); + results.push(LfsFileStatus { + path: rel_path.to_path_buf(), + pointer, + local, + }); + } + } + + Ok(results) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lfs::filter; + use crate::lfs::gitattributes; + use tempfile::TempDir; + + #[tokio::test] + async fn test_status_finds_pointer_files() { + let tmp = TempDir::new().unwrap(); + let repo_root = tmp.path(); + let oxen_dir = repo_root.join(".oxen"); + let versions_dir = oxen_dir.join("versions"); + std::fs::create_dir_all(&versions_dir).unwrap(); + + // Track *.bin + gitattributes::track_pattern(repo_root, "*.bin").unwrap(); + + // Create a pointer file by running clean. + let content = b"binary content here"; + let pointer_bytes = filter::clean(&versions_dir, content).await.unwrap(); + std::fs::write(repo_root.join("model.bin"), &pointer_bytes).unwrap(); + + // Create a non-matching file. + std::fs::write(repo_root.join("readme.txt"), b"hello").unwrap(); + + let statuses = get_status(repo_root, &versions_dir).await.unwrap(); + assert_eq!(statuses.len(), 1); + assert_eq!(statuses[0].path, PathBuf::from("model.bin")); + assert!(statuses[0].local); + } +} diff --git a/oxen-rust/src/lib/src/lfs/sync.rs b/oxen-rust/src/lib/src/lfs/sync.rs new file mode 100644 index 000000000..9d45304a8 --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/sync.rs @@ -0,0 +1,89 @@ +use std::path::Path; + +use crate::error::OxenError; +use crate::lfs::pointer::PointerFile; +use crate::lfs::status; +use crate::storage::version_store::VersionStore; +use crate::storage::LocalVersionStore; + +/// Push large file versions to the configured Oxen remote. +/// +/// Called by the pre-push hook. `_args` receives the hook arguments +/// (remote name and URL) passed by Git. +pub async fn push_to_remote( + repo_root: &Path, + oxen_dir: &Path, + _args: &[String], +) -> Result<(), OxenError> { + let versions_dir = oxen_dir.join("versions"); + let statuses = status::get_status(repo_root, &versions_dir).await?; + + let to_push: Vec<_> = statuses.iter().filter(|s| s.local).collect(); + if to_push.is_empty() { + log::info!("oxen lfs push: nothing to push"); + return Ok(()); + } + + // TODO (Phase 3): Upload missing versions to the Oxen remote + // using the api::client infrastructure. + log::info!( + "oxen lfs push: {} files would be pushed (remote sync not yet implemented)", + to_push.len() + ); + + Ok(()) +} + +/// Pull large file content and restore pointer files in the working tree. +/// +/// When `local_only` is true, only restores from the local `.oxen/versions/` +/// store (no network). This is used by post-checkout and post-merge hooks. +pub async fn pull_from_remote( + repo_root: &Path, + oxen_dir: &Path, + local_only: bool, +) -> Result<(), OxenError> { + let versions_dir = oxen_dir.join("versions"); + let statuses = status::get_status(repo_root, &versions_dir).await?; + + let store = LocalVersionStore::new(&versions_dir); + let mut restored = 0u64; + + for file_status in &statuses { + if file_status.local { + // Content is available locally — restore the actual file. + let dest = repo_root.join(&file_status.path); + store + .copy_version_to_path(&file_status.pointer.oid, &dest) + .await?; + restored += 1; + } else if !local_only { + // TODO (Phase 3): Fetch from remote, then restore. + log::warn!( + "oxen lfs pull: {} not available locally and remote fetch not yet implemented", + file_status.path.display() + ); + } + } + + if restored > 0 { + log::info!("oxen lfs pull: restored {restored} files"); + } + + Ok(()) +} + +/// Scan working tree for pointer files and return the list of OIDs +/// that need to be pushed. +pub async fn list_pushable_oids( + repo_root: &Path, + oxen_dir: &Path, +) -> Result, OxenError> { + let versions_dir = oxen_dir.join("versions"); + let statuses = status::get_status(repo_root, &versions_dir).await?; + Ok(statuses + .into_iter() + .filter(|s| s.local) + .map(|s| s.pointer) + .collect()) +} diff --git a/oxen-rust/src/lib/src/lib.rs b/oxen-rust/src/lib/src/lib.rs index 4e2a1aff8..3d7392b07 100644 --- a/oxen-rust/src/lib/src/lib.rs +++ b/oxen-rust/src/lib/src/lib.rs @@ -70,6 +70,7 @@ pub mod constants; pub mod core; pub mod error; pub mod io; +pub mod lfs; pub mod migrations; pub mod model; pub mod namespaces; From d71d66848ad43732b455599404b723e0baa42052 Mon Sep 17 00:00:00 2001 From: Malcolm Greaves Date: Fri, 27 Feb 2026 10:59:57 -0800 Subject: [PATCH 2/9] fixes --- oxen-rust/src/lib/src/lfs/filter_process.rs | 64 +++++++++++++++------ 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/oxen-rust/src/lib/src/lfs/filter_process.rs b/oxen-rust/src/lib/src/lfs/filter_process.rs index cf811ba3c..1edc69c90 100644 --- a/oxen-rust/src/lib/src/lfs/filter_process.rs +++ b/oxen-rust/src/lib/src/lfs/filter_process.rs @@ -77,16 +77,26 @@ pub mod pkt_line { Ok(result) } - /// Read text key=value pairs until flush. - pub fn read_text_pairs_until_flush(r: &mut impl BufRead) -> io::Result> { - let mut pairs = Vec::new(); + /// Read all text lines until flush. Returns each line trimmed of trailing newline. + pub fn read_lines_until_flush(r: &mut impl BufRead) -> io::Result> { + let mut lines = Vec::new(); while let Some(pkt) = read_packet(r)? { let text = String::from_utf8_lossy(&pkt); - let text = text.trim_end_matches('\n'); - if let Some((key, value)) = text.split_once('=') { - pairs.push((key.to_string(), value.to_string())); - } + lines.push(text.trim_end_matches('\n').to_string()); } + Ok(lines) + } + + /// Read text key=value pairs until flush (lines without `=` are skipped). + pub fn read_text_pairs_until_flush(r: &mut impl BufRead) -> io::Result> { + let lines = read_lines_until_flush(r)?; + let pairs = lines + .into_iter() + .filter_map(|line| { + line.split_once('=') + .map(|(k, v)| (k.to_string(), v.to_string())) + }) + .collect(); Ok(pairs) } } @@ -104,27 +114,28 @@ pub fn run_filter_process(versions_dir: &Path) -> Result<(), OxenError> { let lfs_config = LfsConfig::load(versions_dir.parent().unwrap_or(Path::new(".")))?; // --- Handshake --- - // Read welcome message. - let welcome = pkt_line::read_until_flush(&mut reader)?; - let welcome_str = String::from_utf8_lossy(&welcome); - if !welcome_str.contains("git-filter-client") { - return Err(OxenError::basic_str("expected git-filter-client handshake")); + // Phase 1: Git sends welcome + version(s) in one flush group. + // packet: git-filter-client\n + // packet: version=2\n + // packet: 0000 + let welcome_lines = pkt_line::read_lines_until_flush(&mut reader)?; + + if !welcome_lines.iter().any(|l| l == "git-filter-client") { + return Err(OxenError::basic_str( + "expected git-filter-client in handshake", + )); } - // Read version. - let _version = pkt_line::read_until_flush(&mut reader)?; - - // Send our welcome + version. + // Respond with welcome + chosen version in one flush group. pkt_line::write_text(&mut writer, "git-filter-server")?; - pkt_line::write_flush(&mut writer)?; pkt_line::write_text(&mut writer, "version=2")?; pkt_line::write_flush(&mut writer)?; writer.flush()?; - // Read capabilities. + // Phase 2: Git sends capabilities in one flush group. let _caps = pkt_line::read_text_pairs_until_flush(&mut reader)?; - // Advertise our capabilities. + // Respond with the capabilities we support. pkt_line::write_text(&mut writer, "capability=clean")?; pkt_line::write_text(&mut writer, "capability=smudge")?; pkt_line::write_flush(&mut writer)?; @@ -221,4 +232,19 @@ mod tests { assert_eq!(pairs[0], ("command".to_string(), "clean".to_string())); assert_eq!(pairs[1], ("pathname".to_string(), "test.bin".to_string())); } + + #[test] + fn test_read_lines_includes_non_pairs() { + // Git sends "git-filter-client" (no =) plus "version=2" in one group. + let mut buf = Vec::new(); + write_text(&mut buf, "git-filter-client").unwrap(); + write_text(&mut buf, "version=2").unwrap(); + write_flush(&mut buf).unwrap(); + + let mut reader = Cursor::new(buf); + let lines = read_lines_until_flush(&mut reader).unwrap(); + assert_eq!(lines.len(), 2); + assert_eq!(lines[0], "git-filter-client"); + assert_eq!(lines[1], "version=2"); + } } From f52cb5938aed8989df73d7cfd54945cc5df7c995 Mon Sep 17 00:00:00 2001 From: Malcolm Greaves Date: Fri, 27 Feb 2026 11:07:01 -0800 Subject: [PATCH 3/9] use full path to `oxen` binary --- oxen-rust/src/cli/src/cmd/lfs/init.rs | 6 +- oxen-rust/src/cli/src/cmd/lfs/install.rs | 6 +- oxen-rust/src/lib/src/lfs/hooks.rs | 75 +++++++++++++++++++----- oxen-rust/src/lib/src/lfs/install.rs | 68 ++++++++++++++++++--- 4 files changed, 127 insertions(+), 28 deletions(-) diff --git a/oxen-rust/src/cli/src/cmd/lfs/init.rs b/oxen-rust/src/cli/src/cmd/lfs/init.rs index 36b07fed1..62cc387ab 100644 --- a/oxen-rust/src/cli/src/cmd/lfs/init.rs +++ b/oxen-rust/src/cli/src/cmd/lfs/init.rs @@ -52,8 +52,10 @@ impl RunCmd for LfsInitCmd { let config = lfs::config::LfsConfig { remote_url }; config.save(&oxen_dir)?; - // Install hooks. - lfs::hooks::install_hooks(&git_dir)?; + // Install hooks using the full path to the current oxen binary. + let oxen_bin = lfs::install::current_exe_path()?; + let oxen_path = std::path::Path::new(&oxen_bin); + lfs::hooks::install_hooks(&git_dir, oxen_path)?; // Add .oxen/ to .gitignore. ensure_gitignore(&repo_root)?; diff --git a/oxen-rust/src/cli/src/cmd/lfs/install.rs b/oxen-rust/src/cli/src/cmd/lfs/install.rs index 2c9c64d21..1862fe8b8 100644 --- a/oxen-rust/src/cli/src/cmd/lfs/install.rs +++ b/oxen-rust/src/cli/src/cmd/lfs/install.rs @@ -31,8 +31,10 @@ impl RunCmd for LfsInstallCmd { lfs::install::uninstall_global_filter()?; println!("Oxen LFS global filter uninstalled."); } else { - lfs::install::install_global_filter()?; - println!("Oxen LFS global filter installed."); + let oxen_bin = lfs::install::current_exe_path()?; + let oxen_path = std::path::Path::new(&oxen_bin); + lfs::install::install_global_filter(oxen_path)?; + println!("Oxen LFS global filter installed (using {oxen_bin})."); } Ok(()) } diff --git a/oxen-rust/src/lib/src/lfs/hooks.rs b/oxen-rust/src/lib/src/lfs/hooks.rs index 9a58e5057..74070d994 100644 --- a/oxen-rust/src/lib/src/lfs/hooks.rs +++ b/oxen-rust/src/lib/src/lfs/hooks.rs @@ -7,19 +7,27 @@ const HOOK_MARKER: &str = "# oxen lfs"; /// Install pre-push, post-checkout, and post-merge hooks into `.git/hooks/`. /// +/// `oxen_bin` is the absolute path to the `oxen` executable that the hooks +/// will invoke. This avoids depending on `oxen` being on PATH. +/// /// Idempotent: checks for existing `oxen lfs` content before appending. /// Respects existing hook scripts by appending rather than overwriting. -pub fn install_hooks(git_dir: &Path) -> Result<(), OxenError> { +pub fn install_hooks(git_dir: &Path, oxen_bin: &Path) -> Result<(), OxenError> { let hooks_dir = git_dir.join("hooks"); std::fs::create_dir_all(&hooks_dir)?; + let bin = shell_quote(oxen_bin); + install_hook( &hooks_dir, "pre-push", &format!( r#"{HOOK_MARKER} -command -v oxen >/dev/null 2>&1 || {{ echo >&2 "oxen not found in PATH, skipping LFS pre-push hook"; exit 0; }} -oxen lfs push "$@" +if [ ! -x "{bin}" ]; then + echo >&2 "oxen not found at {bin}, skipping LFS pre-push hook" + exit 0 +fi +{bin} lfs push "$@" "# ), )?; @@ -29,8 +37,8 @@ oxen lfs push "$@" "post-checkout", &format!( r#"{HOOK_MARKER} -command -v oxen >/dev/null 2>&1 || exit 0 -oxen lfs pull --local +[ -x "{bin}" ] || exit 0 +{bin} lfs pull --local "# ), )?; @@ -40,8 +48,8 @@ oxen lfs pull --local "post-merge", &format!( r#"{HOOK_MARKER} -command -v oxen >/dev/null 2>&1 || exit 0 -oxen lfs pull --local +[ -x "{bin}" ] || exit 0 +{bin} lfs pull --local "# ), )?; @@ -49,6 +57,16 @@ oxen lfs pull --local Ok(()) } +/// Shell-quote a path if it contains spaces, otherwise return as-is. +fn shell_quote(path: &Path) -> String { + let s = path.to_string_lossy(); + if s.contains(' ') { + format!("'{s}'") + } else { + s.into_owned() + } +} + fn install_hook(hooks_dir: &Path, name: &str, snippet: &str) -> Result<(), OxenError> { let hook_path = hooks_dir.join(name); @@ -93,24 +111,29 @@ fn install_hook(hooks_dir: &Path, name: &str, snippet: &str) -> Result<(), OxenE #[cfg(test)] mod tests { use super::*; + use std::path::PathBuf; use tempfile::TempDir; + fn test_bin() -> PathBuf { + PathBuf::from("/usr/local/bin/oxen") + } + #[test] fn test_install_hooks_creates_files() { let tmp = TempDir::new().unwrap(); let git_dir = tmp.path().join(".git"); std::fs::create_dir_all(&git_dir).unwrap(); - install_hooks(&git_dir).unwrap(); + install_hooks(&git_dir, &test_bin()).unwrap(); let hooks_dir = git_dir.join("hooks"); assert!(hooks_dir.join("pre-push").exists()); assert!(hooks_dir.join("post-checkout").exists()); assert!(hooks_dir.join("post-merge").exists()); - // Check content. + // Check content uses full path. let pre_push = std::fs::read_to_string(hooks_dir.join("pre-push")).unwrap(); - assert!(pre_push.contains("oxen lfs push")); + assert!(pre_push.contains("/usr/local/bin/oxen lfs push")); assert!(pre_push.starts_with("#!/bin/sh")); } @@ -120,12 +143,12 @@ mod tests { let git_dir = tmp.path().join(".git"); std::fs::create_dir_all(&git_dir).unwrap(); - install_hooks(&git_dir).unwrap(); - install_hooks(&git_dir).unwrap(); + install_hooks(&git_dir, &test_bin()).unwrap(); + install_hooks(&git_dir, &test_bin()).unwrap(); let pre_push = std::fs::read_to_string(git_dir.join("hooks/pre-push")).unwrap(); assert_eq!( - pre_push.matches("oxen lfs push").count(), + pre_push.matches("lfs push").count(), 1, "should not duplicate hook content" ); @@ -145,14 +168,34 @@ mod tests { ) .unwrap(); - install_hooks(&git_dir).unwrap(); + install_hooks(&git_dir, &test_bin()).unwrap(); let content = std::fs::read_to_string(hooks_dir.join("pre-push")).unwrap(); assert!( content.contains("existing hook"), "should preserve existing" ); - assert!(content.contains("oxen lfs push"), "should add our hook"); + assert!( + content.contains("/usr/local/bin/oxen lfs push"), + "should add our hook with full path" + ); + } + + #[test] + fn test_install_hooks_with_spaces_in_path() { + let tmp = TempDir::new().unwrap(); + let git_dir = tmp.path().join(".git"); + std::fs::create_dir_all(&git_dir).unwrap(); + + let bin = PathBuf::from("/path with spaces/oxen"); + install_hooks(&git_dir, &bin).unwrap(); + + let pre_push = + std::fs::read_to_string(git_dir.join("hooks/pre-push")).unwrap(); + assert!( + pre_push.contains("'/path with spaces/oxen' lfs push"), + "should quote path with spaces" + ); } #[cfg(unix)] @@ -164,7 +207,7 @@ mod tests { let git_dir = tmp.path().join(".git"); std::fs::create_dir_all(&git_dir).unwrap(); - install_hooks(&git_dir).unwrap(); + install_hooks(&git_dir, &test_bin()).unwrap(); let meta = std::fs::metadata(git_dir.join("hooks/pre-push")).unwrap(); let mode = meta.permissions().mode(); diff --git a/oxen-rust/src/lib/src/lfs/install.rs b/oxen-rust/src/lib/src/lfs/install.rs index f601195dc..b943b9bf4 100644 --- a/oxen-rust/src/lib/src/lfs/install.rs +++ b/oxen-rust/src/lib/src/lfs/install.rs @@ -1,26 +1,58 @@ +use std::path::Path; use std::process::Command; use crate::error::OxenError; +/// Resolve the canonical absolute path of the running `oxen` binary. +pub fn current_exe_path() -> Result { + let exe = std::env::current_exe() + .map_err(|e| OxenError::basic_str(format!("failed to determine current executable: {e}")))?; + let canonical = exe.canonicalize().map_err(|e| { + OxenError::basic_str(format!( + "failed to canonicalize executable path {}: {e}", + exe.display() + )) + })?; + canonical.to_str().map(|s| s.to_string()).ok_or_else(|| { + OxenError::basic_str(format!( + "executable path is not valid UTF-8: {}", + canonical.display() + )) + }) +} + /// Configure Git's global filter driver so that every repository /// using `filter=oxen` will invoke our clean/smudge process. /// +/// `oxen_bin` is the absolute path to the `oxen` executable. +/// /// Sets in `~/.gitconfig`: /// ```text /// [filter "oxen"] -/// process = oxen lfs filter-process +/// process = /full/path/to/oxen lfs filter-process /// required = true -/// clean = oxen lfs clean -- %f -/// smudge = oxen lfs smudge -- %f +/// clean = /full/path/to/oxen lfs clean -- %f +/// smudge = /full/path/to/oxen lfs smudge -- %f /// ``` -pub fn install_global_filter() -> Result<(), OxenError> { - git_config_global("filter.oxen.process", "oxen lfs filter-process")?; +pub fn install_global_filter(oxen_bin: &Path) -> Result<(), OxenError> { + let bin = shell_quote(oxen_bin); + git_config_global("filter.oxen.process", &format!("{bin} lfs filter-process"))?; git_config_global("filter.oxen.required", "true")?; - git_config_global("filter.oxen.clean", "oxen lfs clean -- %f")?; - git_config_global("filter.oxen.smudge", "oxen lfs smudge -- %f")?; + git_config_global("filter.oxen.clean", &format!("{bin} lfs clean -- %f"))?; + git_config_global("filter.oxen.smudge", &format!("{bin} lfs smudge -- %f"))?; Ok(()) } +/// Shell-quote a path if it contains spaces, otherwise return as-is. +fn shell_quote(path: &Path) -> String { + let s = path.to_string_lossy(); + if s.contains(' ') { + format!("'{s}'") + } else { + s.into_owned() + } +} + /// Remove the global filter driver configuration. pub fn uninstall_global_filter() -> Result<(), OxenError> { // --remove-section fails if the section doesn't exist, so ignore errors. @@ -57,7 +89,27 @@ mod tests { fn test_install_and_uninstall_do_not_panic() { // Smoke test: just verify the functions can be called without panic. // Actual git config changes are tested in integration tests. - let _ = install_global_filter(); + let exe = std::path::PathBuf::from("/usr/local/bin/oxen"); + let _ = install_global_filter(&exe); let _ = uninstall_global_filter(); } + + #[test] + fn test_current_exe_path_returns_string() { + // Should succeed in any test environment. + let path = current_exe_path().unwrap(); + assert!(!path.is_empty()); + } + + #[test] + fn test_shell_quote_no_spaces() { + let p = std::path::Path::new("/usr/local/bin/oxen"); + assert_eq!(shell_quote(p), "/usr/local/bin/oxen"); + } + + #[test] + fn test_shell_quote_with_spaces() { + let p = std::path::Path::new("/path with spaces/oxen"); + assert_eq!(shell_quote(p), "'/path with spaces/oxen'"); + } } From 19c82890879694be13df0fc090a7623bf97c79aa Mon Sep 17 00:00:00 2001 From: Malcolm Greaves Date: Fri, 27 Feb 2026 11:27:37 -0800 Subject: [PATCH 4/9] fix tokio error --- oxen-rust/src/lib/src/lfs/filter_process.rs | 17 ++++++++++------- oxen-rust/src/lib/src/lfs/hooks.rs | 3 +-- oxen-rust/src/lib/src/lfs/install.rs | 5 +++-- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/oxen-rust/src/lib/src/lfs/filter_process.rs b/oxen-rust/src/lib/src/lfs/filter_process.rs index 1edc69c90..92c31d36f 100644 --- a/oxen-rust/src/lib/src/lfs/filter_process.rs +++ b/oxen-rust/src/lib/src/lfs/filter_process.rs @@ -141,11 +141,10 @@ pub fn run_filter_process(versions_dir: &Path) -> Result<(), OxenError> { pkt_line::write_flush(&mut writer)?; writer.flush()?; - // Build a tokio runtime for async version-store operations. - let rt = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .map_err(|e| OxenError::basic_str(format!("failed to build tokio runtime: {e}")))?; + // Get a handle to the current tokio runtime. The CLI's main() already + // starts one, so we must not create a second. We use block_in_place + + // block_on to run async version-store ops from this synchronous context. + let handle = tokio::runtime::Handle::current(); // --- Per-file loop --- loop { @@ -167,8 +166,12 @@ pub fn run_filter_process(versions_dir: &Path) -> Result<(), OxenError> { let content = pkt_line::read_until_flush(&mut reader)?; let result = match command { - "clean" => rt.block_on(filter::clean(versions_dir, &content))?, - "smudge" => rt.block_on(filter::smudge(versions_dir, &lfs_config, &content))?, + "clean" => tokio::task::block_in_place(|| { + handle.block_on(filter::clean(versions_dir, &content)) + })?, + "smudge" => tokio::task::block_in_place(|| { + handle.block_on(filter::smudge(versions_dir, &lfs_config, &content)) + })?, other => { log::warn!("oxen lfs filter-process: unknown command '{other}', passing through"); content diff --git a/oxen-rust/src/lib/src/lfs/hooks.rs b/oxen-rust/src/lib/src/lfs/hooks.rs index 74070d994..f2de06b13 100644 --- a/oxen-rust/src/lib/src/lfs/hooks.rs +++ b/oxen-rust/src/lib/src/lfs/hooks.rs @@ -190,8 +190,7 @@ mod tests { let bin = PathBuf::from("/path with spaces/oxen"); install_hooks(&git_dir, &bin).unwrap(); - let pre_push = - std::fs::read_to_string(git_dir.join("hooks/pre-push")).unwrap(); + let pre_push = std::fs::read_to_string(git_dir.join("hooks/pre-push")).unwrap(); assert!( pre_push.contains("'/path with spaces/oxen' lfs push"), "should quote path with spaces" diff --git a/oxen-rust/src/lib/src/lfs/install.rs b/oxen-rust/src/lib/src/lfs/install.rs index b943b9bf4..5e00a1bef 100644 --- a/oxen-rust/src/lib/src/lfs/install.rs +++ b/oxen-rust/src/lib/src/lfs/install.rs @@ -5,8 +5,9 @@ use crate::error::OxenError; /// Resolve the canonical absolute path of the running `oxen` binary. pub fn current_exe_path() -> Result { - let exe = std::env::current_exe() - .map_err(|e| OxenError::basic_str(format!("failed to determine current executable: {e}")))?; + let exe = std::env::current_exe().map_err(|e| { + OxenError::basic_str(format!("failed to determine current executable: {e}")) + })?; let canonical = exe.canonicalize().map_err(|e| { OxenError::basic_str(format!( "failed to canonicalize executable path {}: {e}", From e2492f4fe57d266a7b7383472c2ab0a81340c2ba Mon Sep 17 00:00:00 2001 From: Malcolm Greaves Date: Fri, 27 Feb 2026 11:32:30 -0800 Subject: [PATCH 5/9] fix clone of oxen-lfs enxalbed repo + explicit sync command --- oxen-rust/src/cli/src/cmd/lfs.rs | 4 + oxen-rust/src/cli/src/cmd/lfs/fetch_all.rs | 37 ++++++ oxen-rust/src/cli/src/cmd/lfs/init.rs | 10 ++ oxen-rust/src/cli/src/cmd/lfs/smudge.rs | 2 +- oxen-rust/src/lib/src/lfs/filter.rs | 130 ++++++++++++++++++-- oxen-rust/src/lib/src/lfs/filter_process.rs | 16 ++- oxen-rust/src/lib/src/lfs/sync.rs | 101 +++++++++++++-- 7 files changed, 279 insertions(+), 21 deletions(-) create mode 100644 oxen-rust/src/cli/src/cmd/lfs/fetch_all.rs diff --git a/oxen-rust/src/cli/src/cmd/lfs.rs b/oxen-rust/src/cli/src/cmd/lfs.rs index 712af7175..c89ba289b 100644 --- a/oxen-rust/src/cli/src/cmd/lfs.rs +++ b/oxen-rust/src/cli/src/cmd/lfs.rs @@ -4,6 +4,9 @@ pub use clean::LfsCleanCmd; pub mod env; pub use env::LfsEnvCmd; +pub mod fetch_all; +pub use fetch_all::LfsFetchAllCmd; + pub mod filter_process; pub use filter_process::LfsFilterProcessCmd; @@ -84,6 +87,7 @@ impl LfsCmd { let commands: Vec> = vec![ Box::new(LfsCleanCmd), Box::new(LfsEnvCmd), + Box::new(LfsFetchAllCmd), Box::new(LfsFilterProcessCmd), Box::new(LfsInitCmd), Box::new(LfsInstallCmd), diff --git a/oxen-rust/src/cli/src/cmd/lfs/fetch_all.rs b/oxen-rust/src/cli/src/cmd/lfs/fetch_all.rs new file mode 100644 index 000000000..cbe563e91 --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/fetch_all.rs @@ -0,0 +1,37 @@ +use async_trait::async_trait; +use clap::Command; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "fetch-all"; +pub struct LfsFetchAllCmd; + +#[async_trait] +impl RunCmd for LfsFetchAllCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME).about( + "Resolve and restore ALL tracked pointer files. Errors if any file cannot be resolved.", + ) + } + + async fn run(&self, _args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let oxen_dir = repo_root.join(OXEN_HIDDEN_DIR); + + if !oxen_dir.exists() { + return Err(OxenError::basic_str( + "Not an oxen lfs repository. Run `oxen lfs init` first.", + )); + } + + lfs::sync::fetch_all(&repo_root, &oxen_dir).await + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/init.rs b/oxen-rust/src/cli/src/cmd/lfs/init.rs index 62cc387ab..54c17c66d 100644 --- a/oxen-rust/src/cli/src/cmd/lfs/init.rs +++ b/oxen-rust/src/cli/src/cmd/lfs/init.rs @@ -61,6 +61,16 @@ impl RunCmd for LfsInitCmd { ensure_gitignore(&repo_root)?; println!("Oxen LFS initialized in {}", repo_root.display()); + + // If .gitattributes already has tracked patterns (e.g. after clone), + // hint that the user should pull to restore large files. + let patterns = lfs::gitattributes::list_tracked_patterns(&repo_root)?; + if !patterns.is_empty() { + println!( + "Tracked patterns found — run `oxen lfs pull` or `oxen lfs fetch-all` to restore large files." + ); + } + Ok(()) } } diff --git a/oxen-rust/src/cli/src/cmd/lfs/smudge.rs b/oxen-rust/src/cli/src/cmd/lfs/smudge.rs index e1c007c67..962d17b0f 100644 --- a/oxen-rust/src/cli/src/cmd/lfs/smudge.rs +++ b/oxen-rust/src/cli/src/cmd/lfs/smudge.rs @@ -41,7 +41,7 @@ impl RunCmd for LfsSmudgeCmd { buf }; - let result = lfs::filter::smudge(&versions_dir, &config, &pointer_data).await?; + let result = lfs::filter::smudge(&versions_dir, &repo_root, &config, &pointer_data).await?; // Write result to stdout. { diff --git a/oxen-rust/src/lib/src/lfs/filter.rs b/oxen-rust/src/lib/src/lfs/filter.rs index 06b3fa50b..a175089da 100644 --- a/oxen-rust/src/lib/src/lfs/filter.rs +++ b/oxen-rust/src/lib/src/lfs/filter.rs @@ -1,4 +1,5 @@ -use std::path::Path; +use std::path::{Path, PathBuf}; +use std::process::Command; use crate::error::OxenError; use crate::lfs::config::LfsConfig; @@ -28,10 +29,13 @@ pub async fn clean(versions_dir: &Path, content: &[u8]) -> Result, OxenE /// Smudge filter: parse pointer, look up content in version store, return content. /// -/// Strategy: local store first, then remote fetch (if configured), then fall back -/// to returning the pointer bytes unchanged with a warning. +/// Strategy: +/// 1. Local `.oxen/versions/` store. +/// 2. Origin's `.oxen/versions/` (for local clones — discovered via `git config remote.origin.url`). +/// 3. Fallback: return pointer bytes unchanged with a warning. pub async fn smudge( versions_dir: &Path, + repo_root: &Path, _lfs_config: &LfsConfig, pointer_data: &[u8], ) -> Result, OxenError> { @@ -41,6 +45,9 @@ pub async fn smudge( None => return Ok(pointer_data.to_vec()), }; + // Ensure versions dir exists (may be missing on a fresh clone). + std::fs::create_dir_all(versions_dir).ok(); + let store = LocalVersionStore::new(versions_dir); // 1. Try local store. @@ -48,9 +55,21 @@ pub async fn smudge( return store.get_version(&pointer.oid).await; } - // 2. TODO (Phase 3): fetch from Oxen remote with timeout. + // 2. Try origin's version store (local clones only). + if let Some(origin_versions) = origin_versions_dir(repo_root) { + let origin_store = LocalVersionStore::new(&origin_versions); + if origin_store.version_exists(&pointer.oid).await? { + // Copy into our local store for future use. + let data = origin_store.get_version(&pointer.oid).await?; + store.init().await?; + store.store_version(&pointer.oid, &data).await?; + return Ok(data); + } + } + + // 3. TODO (Phase 3): fetch from Oxen remote with timeout. - // 3. Fallback — return pointer bytes and warn. + // 4. Fallback — return pointer bytes and warn. log::warn!( "oxen lfs smudge: content for {} not available locally; run `oxen lfs pull`", pointer.oid, @@ -58,6 +77,69 @@ pub async fn smudge( Ok(pointer_data.to_vec()) } +/// Discover the origin's `.oxen/versions/` directory for local clones. +/// +/// Returns `None` if the origin is a remote URL or doesn't have an `.oxen/versions/` dir. +fn origin_versions_dir(repo_root: &Path) -> Option { + let url = get_origin_url(repo_root)?; + let origin_path = as_local_path(&url)?; + let versions = origin_path.join(".oxen").join("versions"); + if versions.is_dir() { + Some(versions) + } else { + None + } +} + +/// Run `git config remote.origin.url` in the given repo directory. +fn get_origin_url(repo_root: &Path) -> Option { + let output = Command::new("git") + .args(["config", "remote.origin.url"]) + .current_dir(repo_root) + .output() + .ok()?; + + if !output.status.success() { + return None; + } + + let url = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if url.is_empty() { + None + } else { + Some(url) + } +} + +/// Convert a Git remote URL to a local filesystem path, if it is one. +/// +/// Handles: +/// - Absolute paths: `/foo/bar` +/// - `file://` URLs: `file:///foo/bar` +/// +/// Returns `None` for remote URLs (ssh://, https://, git@, etc.). +fn as_local_path(url: &str) -> Option { + if let Some(stripped) = url.strip_prefix("file://") { + let path = PathBuf::from(stripped); + if path.is_dir() { + return Some(path); + } + return None; + } + + // Reject obvious remote URLs. + if url.contains("://") || url.contains('@') { + return None; + } + + let path = PathBuf::from(url); + if path.is_absolute() && path.is_dir() { + Some(path) + } else { + None + } +} + #[cfg(test)] mod tests { use super::*; @@ -98,13 +180,14 @@ mod tests { #[tokio::test] async fn test_smudge_restores_content() { let tmp = TempDir::new().unwrap(); + let repo_root = tmp.path(); let versions_dir = tmp.path().join("versions"); let config = LfsConfig::default(); let content = b"restore me"; let pointer_bytes = clean(&versions_dir, content).await.unwrap(); - let restored = smudge(&versions_dir, &config, &pointer_bytes) + let restored = smudge(&versions_dir, repo_root, &config, &pointer_bytes) .await .unwrap(); assert_eq!(restored, content); @@ -113,17 +196,21 @@ mod tests { #[tokio::test] async fn test_smudge_passthrough_non_pointer() { let tmp = TempDir::new().unwrap(); + let repo_root = tmp.path(); let versions_dir = tmp.path().join("versions"); let config = LfsConfig::default(); let data = b"not a pointer"; - let result = smudge(&versions_dir, &config, data).await.unwrap(); + let result = smudge(&versions_dir, repo_root, &config, data) + .await + .unwrap(); assert_eq!(result, data); } #[tokio::test] async fn test_smudge_fallback_when_missing() { let tmp = TempDir::new().unwrap(); + let repo_root = tmp.path(); let versions_dir = tmp.path().join("versions"); let config = LfsConfig::default(); @@ -131,13 +218,34 @@ mod tests { let ptr = PointerFile::new("a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8", 999); let pointer_bytes = ptr.encode(); - // Create the versions dir so version_exists doesn't fail. - std::fs::create_dir_all(&versions_dir).unwrap(); - - let result = smudge(&versions_dir, &config, &pointer_bytes) + let result = smudge(&versions_dir, repo_root, &config, &pointer_bytes) .await .unwrap(); // Falls back to returning the pointer bytes. assert_eq!(result, pointer_bytes); } + + #[test] + fn test_as_local_path_rejects_ssh() { + assert!(as_local_path("git@github.com:user/repo.git").is_none()); + } + + #[test] + fn test_as_local_path_rejects_https() { + assert!(as_local_path("https://github.com/user/repo.git").is_none()); + } + + #[test] + fn test_as_local_path_accepts_file_url() { + let tmp = TempDir::new().unwrap(); + let url = format!("file://{}", tmp.path().display()); + assert_eq!(as_local_path(&url), Some(tmp.path().to_path_buf())); + } + + #[test] + fn test_as_local_path_accepts_absolute_path() { + let tmp = TempDir::new().unwrap(); + let path_str = tmp.path().to_string_lossy().to_string(); + assert_eq!(as_local_path(&path_str), Some(tmp.path().to_path_buf())); + } } diff --git a/oxen-rust/src/lib/src/lfs/filter_process.rs b/oxen-rust/src/lib/src/lfs/filter_process.rs index 92c31d36f..abb002031 100644 --- a/oxen-rust/src/lib/src/lfs/filter_process.rs +++ b/oxen-rust/src/lib/src/lfs/filter_process.rs @@ -111,7 +111,14 @@ pub fn run_filter_process(versions_dir: &Path) -> Result<(), OxenError> { let mut reader = BufReader::new(stdin.lock()); let mut writer = BufWriter::new(stdout.lock()); - let lfs_config = LfsConfig::load(versions_dir.parent().unwrap_or(Path::new(".")))?; + // Ensure versions dir exists (may be missing on a fresh clone). + std::fs::create_dir_all(versions_dir).ok(); + + // Derive repo_root: versions_dir is .oxen/versions, so two parents up. + let oxen_dir = versions_dir.parent().unwrap_or(Path::new(".")); + let repo_root = oxen_dir.parent().unwrap_or(Path::new(".")); + + let lfs_config = LfsConfig::load(oxen_dir)?; // --- Handshake --- // Phase 1: Git sends welcome + version(s) in one flush group. @@ -170,7 +177,12 @@ pub fn run_filter_process(versions_dir: &Path) -> Result<(), OxenError> { handle.block_on(filter::clean(versions_dir, &content)) })?, "smudge" => tokio::task::block_in_place(|| { - handle.block_on(filter::smudge(versions_dir, &lfs_config, &content)) + handle.block_on(filter::smudge( + versions_dir, + repo_root, + &lfs_config, + &content, + )) })?, other => { log::warn!("oxen lfs filter-process: unknown command '{other}', passing through"); diff --git a/oxen-rust/src/lib/src/lfs/sync.rs b/oxen-rust/src/lib/src/lfs/sync.rs index 9d45304a8..f8b120ecd 100644 --- a/oxen-rust/src/lib/src/lfs/sync.rs +++ b/oxen-rust/src/lib/src/lfs/sync.rs @@ -1,6 +1,8 @@ use std::path::Path; use crate::error::OxenError; +use crate::lfs::config::LfsConfig; +use crate::lfs::filter; use crate::lfs::pointer::PointerFile; use crate::lfs::status; use crate::storage::version_store::VersionStore; @@ -16,6 +18,8 @@ pub async fn push_to_remote( _args: &[String], ) -> Result<(), OxenError> { let versions_dir = oxen_dir.join("versions"); + std::fs::create_dir_all(&versions_dir).ok(); + let statuses = status::get_status(repo_root, &versions_dir).await?; let to_push: Vec<_> = statuses.iter().filter(|s| s.local).collect(); @@ -44,9 +48,12 @@ pub async fn pull_from_remote( local_only: bool, ) -> Result<(), OxenError> { let versions_dir = oxen_dir.join("versions"); + std::fs::create_dir_all(&versions_dir).ok(); + let statuses = status::get_status(repo_root, &versions_dir).await?; let store = LocalVersionStore::new(&versions_dir); + let lfs_config = LfsConfig::load(oxen_dir)?; let mut restored = 0u64; for file_status in &statuses { @@ -57,19 +64,99 @@ pub async fn pull_from_remote( .copy_version_to_path(&file_status.pointer.oid, &dest) .await?; restored += 1; - } else if !local_only { - // TODO (Phase 3): Fetch from remote, then restore. - log::warn!( - "oxen lfs pull: {} not available locally and remote fetch not yet implemented", - file_status.path.display() - ); + } else { + // Try smudge (which checks origin for local clones). + let pointer_data = file_status.pointer.encode(); + let result = + filter::smudge(&versions_dir, repo_root, &lfs_config, &pointer_data).await?; + if !PointerFile::is_pointer(&result) { + // Smudge resolved it — write to working tree. + let dest = repo_root.join(&file_status.path); + std::fs::write(&dest, &result)?; + restored += 1; + } else if !local_only { + // TODO (Phase 3): Fetch from remote, then restore. + log::warn!( + "oxen lfs pull: {} not available locally and remote fetch not yet implemented", + file_status.path.display() + ); + } } } if restored > 0 { - log::info!("oxen lfs pull: restored {restored} files"); + println!("oxen lfs pull: restored {restored} file(s)"); + } + + Ok(()) +} + +/// Force-synchronize ALL tracked pointer files in the working tree. +/// +/// For each pointer file that matches a tracked pattern: +/// 1. Try the local `.oxen/versions/` store. +/// 2. Try the origin's `.oxen/versions/` (for local clones). +/// 3. If any file cannot be resolved, return an error listing all failures. +/// +/// This is meant to be run explicitly by the user to guarantee every +/// pointer is replaced with actual content. +pub async fn fetch_all(repo_root: &Path, oxen_dir: &Path) -> Result<(), OxenError> { + let versions_dir = oxen_dir.join("versions"); + std::fs::create_dir_all(&versions_dir).ok(); + + let lfs_config = LfsConfig::load(oxen_dir)?; + let statuses = status::get_status(repo_root, &versions_dir).await?; + + if statuses.is_empty() { + println!("oxen lfs fetch-all: no tracked pointer files found"); + return Ok(()); + } + + let store = LocalVersionStore::new(&versions_dir); + let mut restored = 0u64; + let mut failures: Vec = Vec::new(); + + for file_status in &statuses { + let dest = repo_root.join(&file_status.path); + + if file_status.local { + // Available in local store — restore directly. + store + .copy_version_to_path(&file_status.pointer.oid, &dest) + .await?; + restored += 1; + println!(" restored: {}", file_status.path.display()); + continue; + } + + // Try smudge (which checks origin for local clones). + let pointer_data = file_status.pointer.encode(); + let result = filter::smudge(&versions_dir, repo_root, &lfs_config, &pointer_data).await?; + + if PointerFile::is_pointer(&result) { + // Could not resolve this pointer. + failures.push(format!( + "{} (oid: {})", + file_status.path.display(), + file_status.pointer.oid + )); + } else { + std::fs::write(&dest, &result)?; + restored += 1; + println!(" restored: {}", file_status.path.display()); + } + } + + if !failures.is_empty() { + let msg = format!( + "oxen lfs fetch-all: {} file(s) could not be resolved:\n {}", + failures.len(), + failures.join("\n ") + ); + return Err(OxenError::basic_str(msg)); } + println!("oxen lfs fetch-all: all {restored} file(s) restored successfully"); Ok(()) } From 4c3a8d371209e2f32edde2204974c19c830ac5c9 Mon Sep 17 00:00:00 2001 From: Malcolm Greaves Date: Fri, 27 Feb 2026 11:42:41 -0800 Subject: [PATCH 6/9] oxen lfs fetch-all keeps git status clean --- oxen-rust/src/lib/src/lfs/sync.rs | 57 +++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/oxen-rust/src/lib/src/lfs/sync.rs b/oxen-rust/src/lib/src/lfs/sync.rs index f8b120ecd..4acb79f5d 100644 --- a/oxen-rust/src/lib/src/lfs/sync.rs +++ b/oxen-rust/src/lib/src/lfs/sync.rs @@ -1,4 +1,5 @@ -use std::path::Path; +use std::path::{Path, PathBuf}; +use std::process::Command; use crate::error::OxenError; use crate::lfs::config::LfsConfig; @@ -54,7 +55,7 @@ pub async fn pull_from_remote( let store = LocalVersionStore::new(&versions_dir); let lfs_config = LfsConfig::load(oxen_dir)?; - let mut restored = 0u64; + let mut restored_paths: Vec = Vec::new(); for file_status in &statuses { if file_status.local { @@ -63,7 +64,7 @@ pub async fn pull_from_remote( store .copy_version_to_path(&file_status.pointer.oid, &dest) .await?; - restored += 1; + restored_paths.push(file_status.path.clone()); } else { // Try smudge (which checks origin for local clones). let pointer_data = file_status.pointer.encode(); @@ -73,7 +74,7 @@ pub async fn pull_from_remote( // Smudge resolved it — write to working tree. let dest = repo_root.join(&file_status.path); std::fs::write(&dest, &result)?; - restored += 1; + restored_paths.push(file_status.path.clone()); } else if !local_only { // TODO (Phase 3): Fetch from remote, then restore. log::warn!( @@ -84,8 +85,12 @@ pub async fn pull_from_remote( } } - if restored > 0 { - println!("oxen lfs pull: restored {restored} file(s)"); + if !restored_paths.is_empty() { + // Re-add restored files so Git's index stat cache reflects the new + // on-disk content. The clean filter produces the same pointer blob, + // so no actual index change occurs — only the stat cache is updated. + git_add(repo_root, &restored_paths); + println!("oxen lfs pull: restored {} file(s)", restored_paths.len()); } Ok(()) @@ -113,7 +118,7 @@ pub async fn fetch_all(repo_root: &Path, oxen_dir: &Path) -> Result<(), OxenErro } let store = LocalVersionStore::new(&versions_dir); - let mut restored = 0u64; + let mut restored_paths: Vec = Vec::new(); let mut failures: Vec = Vec::new(); for file_status in &statuses { @@ -124,7 +129,7 @@ pub async fn fetch_all(repo_root: &Path, oxen_dir: &Path) -> Result<(), OxenErro store .copy_version_to_path(&file_status.pointer.oid, &dest) .await?; - restored += 1; + restored_paths.push(file_status.path.clone()); println!(" restored: {}", file_status.path.display()); continue; } @@ -142,7 +147,7 @@ pub async fn fetch_all(repo_root: &Path, oxen_dir: &Path) -> Result<(), OxenErro )); } else { std::fs::write(&dest, &result)?; - restored += 1; + restored_paths.push(file_status.path.clone()); println!(" restored: {}", file_status.path.display()); } } @@ -156,10 +161,42 @@ pub async fn fetch_all(repo_root: &Path, oxen_dir: &Path) -> Result<(), OxenErro return Err(OxenError::basic_str(msg)); } - println!("oxen lfs fetch-all: all {restored} file(s) restored successfully"); + // Re-add restored files so Git's index stat cache reflects the new + // on-disk content. The clean filter produces the same pointer blob, + // so no actual index change occurs — only the stat cache is updated. + git_add(repo_root, &restored_paths); + + println!( + "oxen lfs fetch-all: all {} file(s) restored successfully", + restored_paths.len() + ); Ok(()) } +/// Run `git add` on a list of paths so Git's index stat cache is updated. +/// +/// After we replace a pointer file with real content, the on-disk size and +/// mtime change. Without re-adding, `git status` shows the files as modified +/// even though the clean filter produces the identical blob. Re-adding lets +/// Git refresh its stat cache. +fn git_add(repo_root: &Path, paths: &[PathBuf]) { + if paths.is_empty() { + return; + } + + let path_args: Vec<&str> = paths.iter().filter_map(|p| p.to_str()).collect(); + if path_args.is_empty() { + return; + } + + let mut cmd = Command::new("git"); + cmd.arg("add").args(&path_args).current_dir(repo_root); + + if let Err(e) = cmd.output() { + log::warn!("oxen lfs: failed to run git add to refresh index: {e}"); + } +} + /// Scan working tree for pointer files and return the list of OIDs /// that need to be pushed. pub async fn list_pushable_oids( From 59a80a9f631aff2047415e5a5b3a417f2c270408 Mon Sep 17 00:00:00 2001 From: Malcolm Greaves Date: Fri, 27 Feb 2026 12:49:45 -0800 Subject: [PATCH 7/9] DELETEME: toods --- TODO | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 TODO diff --git a/TODO b/TODO new file mode 100644 index 000000000..2744ec874 --- /dev/null +++ b/TODO @@ -0,0 +1,2 @@ +- do `oxen lfs init` when doing a `git clone` on an oxen-enabled repository +- fix gaps (oxen lfs push) From e60220dd15fc97084064f7ed4a604a4e52cf85ed Mon Sep 17 00:00:00 2001 From: Malcolm Greaves Date: Fri, 27 Feb 2026 15:44:09 -0800 Subject: [PATCH 8/9] make it work with remote repos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. lfs/config.rs — Added resolve_remote() - New async method that parses remote_url → RemoteRepository via api::client::repositories::get_by_url() - Returns Ok(None) when no remote is configured, errors if URL is set but repo doesn't exist 2. api/client/versions.rs — Generic download_versions_to_store() - Extracted the HTTP QUERY + gzip + tar extraction logic into try_download_versions_to_store() that takes &dyn VersionStore - Added public download_versions_to_store() with retry wrapper (same retry logic as original) - Refactored try_download_data_from_version_paths to delegate — zero behavior change for existing callers 3. lfs/sync.rs — git_add() error handling - Changed return type from () to Result<(), OxenError> - Propagates spawn errors, logs non-zero exit as warning (non-fatal) - Updated both call sites to use ? 4. lfs/sync.rs — Real push_to_remote() implementation - Loads LfsConfig, resolves remote — skips if no remote configured - Builds a temp staging dir with files hard-linked/copied from version store at their real repo-relative paths - Creates workspace → add_files → commit via workspace API - On error, attempts workspace cleanup via delete - Renamed _args → hook_args and logs for debugging 5. lfs/sync.rs — Real pull_from_remote() with remote download - After local + origin resolution, collects still-missing OIDs into need_remote - If !local_only and remote is configured, batch-downloads via download_versions_to_store() - Restores downloaded files to working tree and runs git_add() 6. lfs/sync.rs — fetch_all() updated for remote - After local+origin resolution, tries configured Oxen remote for unresolved pointers - Only errors if pointers remain unresolved AND no remote is available 7. lfs/filter.rs — Remote fetch in smudge() with 30s timeout - Renamed _lfs_config → lfs_config - After local + origin checks, attempts remote fetch wrapped in tokio::time::timeout(30s) - On success, reads from local store; on timeout/error, falls through to pointer fallback 8. lfs/filter_process.rs — Documented _caps - Added comment explaining why capabilities are read but unused 9. Tests (4 new, all passing) - test_push_no_remote_configured — succeeds silently with no remote - test_pull_local_only_no_network — restores local content, doesn't attempt network - test_git_add_returns_result — propagates errors properly - test_smudge_remote_fallback_on_no_server — falls back gracefully when remote unreachable Verification - cargo clippy --no-deps -- -D warnings — clean - cargo test --lib lfs — 44 tests passed, 0 failed --- oxen-rust/src/lib/src/api/client/versions.rs | 50 +++- oxen-rust/src/lib/src/lfs/config.rs | 19 ++ oxen-rust/src/lib/src/lfs/filter.rs | 74 +++++- oxen-rust/src/lib/src/lfs/filter_process.rs | 6 +- oxen-rust/src/lib/src/lfs/sync.rs | 253 +++++++++++++++++-- 5 files changed, 365 insertions(+), 37 deletions(-) diff --git a/oxen-rust/src/lib/src/api/client/versions.rs b/oxen-rust/src/lib/src/api/client/versions.rs index 024479ad1..1eceed6fd 100644 --- a/oxen-rust/src/lib/src/api/client/versions.rs +++ b/oxen-rust/src/lib/src/api/client/versions.rs @@ -5,6 +5,7 @@ use crate::constants::{max_retries, AVG_CHUNK_SIZE}; use crate::error::OxenError; use crate::model::entry::commit_entry::Entry; use crate::model::{LocalRepository, MerkleHash, RemoteRepository}; +use crate::storage::version_store::VersionStore; use crate::util::{self, concurrency, hasher}; use crate::view::versions::{ CleanCorruptedVersionsResponse, CompleteVersionUploadRequest, CompletedFileUpload, @@ -226,15 +227,56 @@ pub async fn try_download_data_from_version_paths( remote_repo: &RemoteRepository, hashes: &[String], local_repo: &LocalRepository, +) -> Result { + let version_store = local_repo.version_store()?; + try_download_versions_to_store(remote_repo, hashes, version_store.as_ref()).await +} + +/// Generic batch download of version blobs into any [`VersionStore`]. +/// +/// Sends the requested hashes to the server, receives a gzip+tar archive, +/// and streams each entry into `version_store` via `store_version_from_reader`. +pub async fn download_versions_to_store( + remote_repo: &RemoteRepository, + hashes: &[String], + version_store: &dyn VersionStore, +) -> Result { + let total_retries = max_retries().try_into().unwrap_or(max_retries() as u64); + let mut num_retries = 0; + + while num_retries < total_retries { + match try_download_versions_to_store(remote_repo, hashes, version_store).await { + Ok(val) => return Ok(val), + Err(OxenError::Authentication(val)) => return Err(OxenError::Authentication(val)), + Err(err) => { + num_retries += 1; + let sleep_time = num_retries * num_retries; + log::warn!("Could not download content {err:?} sleeping {sleep_time}"); + tokio::time::sleep(std::time::Duration::from_secs(sleep_time)).await; + } + } + } + + let err = format!( + "Err: Failed to download {} files after {} retries", + hashes.len(), + total_retries + ); + Err(OxenError::basic_str(err)) +} + +async fn try_download_versions_to_store( + remote_repo: &RemoteRepository, + hashes: &[String], + version_store: &dyn VersionStore, ) -> Result { let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); for hash in hashes.iter() { let line = format!("{hash}\n"); - // log::debug!("download_data_from_version_paths encoding line: {} path: {:?}", line, path); encoder.write_all(line.as_bytes())?; } let body = encoder.finish()?; - log::debug!("download_data_from_version_paths body len: {}", body.len()); + log::debug!("download_versions_to_store body len: {}", body.len()); let url = api::endpoint::url_from_repo(remote_repo, "/versions")?; let client = client::new_for_url(&url)?; @@ -254,7 +296,6 @@ pub async fn try_download_data_from_version_paths( let decoder = GzipDecoder::new(buf_reader); let mut archive = Archive::new(decoder); - let version_store = local_repo.version_store()?; let mut size: u64 = 0; // Iterate over archive entries and stream them to version store @@ -298,8 +339,7 @@ pub async fn try_download_data_from_version_paths( Ok(size) } else { - let err = - format!("api::entries::download_data_from_version_paths Err request failed: {url}"); + let err = format!("api::versions::download_versions_to_store Err request failed: {url}"); Err(OxenError::basic_str(err)) } } diff --git a/oxen-rust/src/lib/src/lfs/config.rs b/oxen-rust/src/lib/src/lfs/config.rs index 4be073a1c..a32c59d62 100644 --- a/oxen-rust/src/lib/src/lfs/config.rs +++ b/oxen-rust/src/lib/src/lfs/config.rs @@ -2,7 +2,9 @@ use std::path::Path; use serde::{Deserialize, Serialize}; +use crate::api; use crate::error::OxenError; +use crate::model::RemoteRepository; const LFS_CONFIG_FILENAME: &str = "lfs.toml"; @@ -25,6 +27,23 @@ impl LfsConfig { Ok(config) } + /// Resolve `remote_url` to a [`RemoteRepository`]. + /// + /// Returns `Ok(None)` when no remote is configured. Returns an error if + /// the URL is set but the repository cannot be found on the server. + pub async fn resolve_remote(&self) -> Result, OxenError> { + let url = match &self.remote_url { + Some(u) if !u.is_empty() => u, + _ => return Ok(None), + }; + match api::client::repositories::get_by_url(url).await? { + Some(repo) => Ok(Some(repo)), + None => Err(OxenError::basic_str(format!( + "oxen lfs: remote repository not found at {url}" + ))), + } + } + /// Persist to `/lfs.toml`. pub fn save(&self, oxen_dir: &Path) -> Result<(), OxenError> { let path = oxen_dir.join(LFS_CONFIG_FILENAME); diff --git a/oxen-rust/src/lib/src/lfs/filter.rs b/oxen-rust/src/lib/src/lfs/filter.rs index a175089da..be13be0a8 100644 --- a/oxen-rust/src/lib/src/lfs/filter.rs +++ b/oxen-rust/src/lib/src/lfs/filter.rs @@ -1,6 +1,8 @@ use std::path::{Path, PathBuf}; use std::process::Command; +use std::time::Duration; +use crate::api; use crate::error::OxenError; use crate::lfs::config::LfsConfig; use crate::lfs::pointer::PointerFile; @@ -32,11 +34,12 @@ pub async fn clean(versions_dir: &Path, content: &[u8]) -> Result, OxenE /// Strategy: /// 1. Local `.oxen/versions/` store. /// 2. Origin's `.oxen/versions/` (for local clones — discovered via `git config remote.origin.url`). -/// 3. Fallback: return pointer bytes unchanged with a warning. +/// 3. Configured Oxen remote (with 30 s timeout). +/// 4. Fallback: return pointer bytes unchanged with a warning. pub async fn smudge( versions_dir: &Path, repo_root: &Path, - _lfs_config: &LfsConfig, + lfs_config: &LfsConfig, pointer_data: &[u8], ) -> Result, OxenError> { // Not a pointer — return data as-is. @@ -67,7 +70,35 @@ pub async fn smudge( } } - // 3. TODO (Phase 3): fetch from Oxen remote with timeout. + // 3. Try the configured Oxen remote with a 30 s timeout. + if lfs_config.remote_url.is_some() { + match tokio::time::timeout( + Duration::from_secs(30), + try_fetch_from_remote(lfs_config, &pointer.oid, &store), + ) + .await + { + Ok(Ok(true)) => { + // Successfully downloaded — read from local store. + return store.get_version(&pointer.oid).await; + } + Ok(Ok(false)) => { + log::debug!("oxen lfs smudge: remote configured but fetch returned nothing"); + } + Ok(Err(e)) => { + log::warn!( + "oxen lfs smudge: remote fetch failed for {}: {e}", + pointer.oid + ); + } + Err(_) => { + log::warn!( + "oxen lfs smudge: remote fetch timed out for {}", + pointer.oid + ); + } + } + } // 4. Fallback — return pointer bytes and warn. log::warn!( @@ -77,6 +108,22 @@ pub async fn smudge( Ok(pointer_data.to_vec()) } +/// Attempt to download a single version from the configured remote. +/// Returns `Ok(true)` if the hash was successfully stored locally. +async fn try_fetch_from_remote( + lfs_config: &LfsConfig, + oid: &str, + store: &LocalVersionStore, +) -> Result { + let remote_repo = match lfs_config.resolve_remote().await? { + Some(r) => r, + None => return Ok(false), + }; + let hashes = vec![oid.to_string()]; + api::client::versions::download_versions_to_store(&remote_repo, &hashes, store).await?; + store.version_exists(oid).await +} + /// Discover the origin's `.oxen/versions/` directory for local clones. /// /// Returns `None` if the origin is a remote URL or doesn't have an `.oxen/versions/` dir. @@ -248,4 +295,25 @@ mod tests { let path_str = tmp.path().to_string_lossy().to_string(); assert_eq!(as_local_path(&path_str), Some(tmp.path().to_path_buf())); } + + #[tokio::test] + async fn test_smudge_remote_fallback_on_no_server() { + // When remote_url is set to an unreachable server, smudge should + // fall back gracefully to returning the pointer bytes. + let tmp = TempDir::new().unwrap(); + let repo_root = tmp.path(); + let versions_dir = tmp.path().join("versions"); + let config = LfsConfig { + remote_url: Some("http://127.0.0.1:19999/nonexistent/repo".to_string()), + }; + + let ptr = PointerFile::new("deadbeefdeadbeefdeadbeefdeadbeef", 42); + let pointer_bytes = ptr.encode(); + + let result = smudge(&versions_dir, repo_root, &config, &pointer_bytes) + .await + .unwrap(); + // Should fall back to returning the pointer unchanged. + assert_eq!(result, pointer_bytes); + } } diff --git a/oxen-rust/src/lib/src/lfs/filter_process.rs b/oxen-rust/src/lib/src/lfs/filter_process.rs index abb002031..1ff79a7fb 100644 --- a/oxen-rust/src/lib/src/lfs/filter_process.rs +++ b/oxen-rust/src/lib/src/lfs/filter_process.rs @@ -139,7 +139,11 @@ pub fn run_filter_process(versions_dir: &Path) -> Result<(), OxenError> { pkt_line::write_flush(&mut writer)?; writer.flush()?; - // Phase 2: Git sends capabilities in one flush group. + // Phase 2: Git sends its capabilities (e.g. capability=clean, + // capability=smudge) in one flush group. We read and discard them + // because the protocol requires consuming this flush group before + // we can advertise our own capabilities. We unconditionally + // advertise both clean and smudge regardless of what Git offers. let _caps = pkt_line::read_text_pairs_until_flush(&mut reader)?; // Respond with the capabilities we support. diff --git a/oxen-rust/src/lib/src/lfs/sync.rs b/oxen-rust/src/lib/src/lfs/sync.rs index 4acb79f5d..957298997 100644 --- a/oxen-rust/src/lib/src/lfs/sync.rs +++ b/oxen-rust/src/lib/src/lfs/sync.rs @@ -1,40 +1,125 @@ use std::path::{Path, PathBuf}; use std::process::Command; +use crate::api; +use crate::config::UserConfig; +use crate::constants::DEFAULT_BRANCH_NAME; use crate::error::OxenError; use crate::lfs::config::LfsConfig; use crate::lfs::filter; use crate::lfs::pointer::PointerFile; use crate::lfs::status; +use crate::model::NewCommitBody; use crate::storage::version_store::VersionStore; use crate::storage::LocalVersionStore; /// Push large file versions to the configured Oxen remote. /// -/// Called by the pre-push hook. `_args` receives the hook arguments -/// (remote name and URL) passed by Git. +/// Called by the pre-push hook or the `oxen lfs push` CLI command. +/// `hook_args` receives the hook arguments (remote name and URL) passed +/// by Git; logged for debugging. pub async fn push_to_remote( repo_root: &Path, oxen_dir: &Path, - _args: &[String], + hook_args: &[String], ) -> Result<(), OxenError> { + log::debug!("oxen lfs push: hook_args={hook_args:?}"); + let versions_dir = oxen_dir.join("versions"); std::fs::create_dir_all(&versions_dir).ok(); - let statuses = status::get_status(repo_root, &versions_dir).await?; + let lfs_config = LfsConfig::load(oxen_dir)?; + let remote_repo = match lfs_config.resolve_remote().await? { + Some(r) => r, + None => { + log::info!("oxen lfs push: no remote configured, skipping"); + return Ok(()); + } + }; + let statuses = status::get_status(repo_root, &versions_dir).await?; let to_push: Vec<_> = statuses.iter().filter(|s| s.local).collect(); if to_push.is_empty() { log::info!("oxen lfs push: nothing to push"); return Ok(()); } - // TODO (Phase 3): Upload missing versions to the Oxen remote - // using the api::client infrastructure. - log::info!( - "oxen lfs push: {} files would be pushed (remote sync not yet implemented)", - to_push.len() + let store = LocalVersionStore::new(&versions_dir); + + // Build a temporary staging directory mirroring the files' real repo-relative paths. + // `add_files` expects absolute paths rooted under a common base directory. + let staging_dir = tempfile::tempdir().map_err(|e| { + OxenError::basic_str(format!("oxen lfs push: failed to create staging dir: {e}")) + })?; + + let mut staged_paths: Vec = Vec::new(); + for file_status in &to_push { + let src = store.get_version_path(&file_status.pointer.oid)?; + let dest = staging_dir.path().join(&file_status.path); + if let Some(parent) = dest.parent() { + std::fs::create_dir_all(parent)?; + } + // Prefer hard-link to avoid copying; fall back to copy. + if std::fs::hard_link(&src, &dest).is_err() { + std::fs::copy(&src, &dest)?; + } + staged_paths.push(dest); + } + + let workspace_id = uuid::Uuid::new_v4().to_string(); + + // Create workspace → upload files → commit. On any error, attempt cleanup. + let result = push_workspace( + &remote_repo, + &workspace_id, + staging_dir.path(), + &staged_paths, + ) + .await; + + if let Err(ref e) = result { + log::warn!("oxen lfs push: push failed ({e}), cleaning up workspace"); + if let Err(del_err) = api::client::workspaces::delete(&remote_repo, &workspace_id).await { + log::warn!("oxen lfs push: workspace cleanup failed: {del_err}"); + } + } + + result?; + + println!( + "oxen lfs push: uploaded {} file(s) to {}", + to_push.len(), + remote_repo.url() ); + Ok(()) +} + +/// Inner helper: create workspace, add files, commit. +async fn push_workspace( + remote_repo: &crate::model::RemoteRepository, + workspace_id: &str, + staging_dir: &Path, + staged_paths: &[PathBuf], +) -> Result<(), OxenError> { + api::client::workspaces::create(remote_repo, DEFAULT_BRANCH_NAME, workspace_id).await?; + + api::client::workspaces::files::add_files( + remote_repo, + workspace_id, + staging_dir, + staged_paths.to_vec(), + ) + .await?; + + let user_config = UserConfig::get()?; + let body = NewCommitBody { + message: "oxen lfs push: sync large files".to_string(), + author: user_config.name, + email: user_config.email, + }; + + api::client::workspaces::commits::commit(remote_repo, DEFAULT_BRANCH_NAME, workspace_id, &body) + .await?; Ok(()) } @@ -56,6 +141,7 @@ pub async fn pull_from_remote( let store = LocalVersionStore::new(&versions_dir); let lfs_config = LfsConfig::load(oxen_dir)?; let mut restored_paths: Vec = Vec::new(); + let mut need_remote: Vec<&status::LfsFileStatus> = Vec::new(); for file_status in &statuses { if file_status.local { @@ -76,10 +162,31 @@ pub async fn pull_from_remote( std::fs::write(&dest, &result)?; restored_paths.push(file_status.path.clone()); } else if !local_only { - // TODO (Phase 3): Fetch from remote, then restore. + need_remote.push(file_status); + } + } + } + + // Batch-download any remaining files from the Oxen remote. + if !need_remote.is_empty() { + if let Some(remote_repo) = lfs_config.resolve_remote().await? { + let hashes: Vec = need_remote.iter().map(|s| s.pointer.oid.clone()).collect(); + api::client::versions::download_versions_to_store(&remote_repo, &hashes, &store) + .await?; + + // Restore the now-downloaded files to the working tree. + for file_status in &need_remote { + let dest = repo_root.join(&file_status.path); + store + .copy_version_to_path(&file_status.pointer.oid, &dest) + .await?; + restored_paths.push(file_status.path.clone()); + } + } else { + for s in &need_remote { log::warn!( - "oxen lfs pull: {} not available locally and remote fetch not yet implemented", - file_status.path.display() + "oxen lfs pull: {} not available locally and no remote configured", + s.path.display() ); } } @@ -89,7 +196,7 @@ pub async fn pull_from_remote( // Re-add restored files so Git's index stat cache reflects the new // on-disk content. The clean filter produces the same pointer blob, // so no actual index change occurs — only the stat cache is updated. - git_add(repo_root, &restored_paths); + git_add(repo_root, &restored_paths)?; println!("oxen lfs pull: restored {} file(s)", restored_paths.len()); } @@ -101,7 +208,8 @@ pub async fn pull_from_remote( /// For each pointer file that matches a tracked pattern: /// 1. Try the local `.oxen/versions/` store. /// 2. Try the origin's `.oxen/versions/` (for local clones). -/// 3. If any file cannot be resolved, return an error listing all failures. +/// 3. Try the configured Oxen remote. +/// 4. If any file still cannot be resolved, return an error listing all failures. /// /// This is meant to be run explicitly by the user to guarantee every /// pointer is replaced with actual content. @@ -119,7 +227,7 @@ pub async fn fetch_all(repo_root: &Path, oxen_dir: &Path) -> Result<(), OxenErro let store = LocalVersionStore::new(&versions_dir); let mut restored_paths: Vec = Vec::new(); - let mut failures: Vec = Vec::new(); + let mut unresolved: Vec<&status::LfsFileStatus> = Vec::new(); for file_status in &statuses { let dest = repo_root.join(&file_status.path); @@ -139,12 +247,7 @@ pub async fn fetch_all(repo_root: &Path, oxen_dir: &Path) -> Result<(), OxenErro let result = filter::smudge(&versions_dir, repo_root, &lfs_config, &pointer_data).await?; if PointerFile::is_pointer(&result) { - // Could not resolve this pointer. - failures.push(format!( - "{} (oid: {})", - file_status.path.display(), - file_status.pointer.oid - )); + unresolved.push(file_status); } else { std::fs::write(&dest, &result)?; restored_paths.push(file_status.path.clone()); @@ -152,7 +255,30 @@ pub async fn fetch_all(repo_root: &Path, oxen_dir: &Path) -> Result<(), OxenErro } } - if !failures.is_empty() { + // Try the configured Oxen remote for any remaining unresolved pointers. + if !unresolved.is_empty() { + if let Some(remote_repo) = lfs_config.resolve_remote().await? { + let hashes: Vec = unresolved.iter().map(|s| s.pointer.oid.clone()).collect(); + api::client::versions::download_versions_to_store(&remote_repo, &hashes, &store) + .await?; + + for file_status in &unresolved { + let dest = repo_root.join(&file_status.path); + store + .copy_version_to_path(&file_status.pointer.oid, &dest) + .await?; + restored_paths.push(file_status.path.clone()); + println!(" restored (remote): {}", file_status.path.display()); + } + unresolved.clear(); + } + } + + if !unresolved.is_empty() { + let failures: Vec = unresolved + .iter() + .map(|s| format!("{} (oid: {})", s.path.display(), s.pointer.oid)) + .collect(); let msg = format!( "oxen lfs fetch-all: {} file(s) could not be resolved:\n {}", failures.len(), @@ -164,7 +290,7 @@ pub async fn fetch_all(repo_root: &Path, oxen_dir: &Path) -> Result<(), OxenErro // Re-add restored files so Git's index stat cache reflects the new // on-disk content. The clean filter produces the same pointer blob, // so no actual index change occurs — only the stat cache is updated. - git_add(repo_root, &restored_paths); + git_add(repo_root, &restored_paths)?; println!( "oxen lfs fetch-all: all {} file(s) restored successfully", @@ -179,22 +305,29 @@ pub async fn fetch_all(repo_root: &Path, oxen_dir: &Path) -> Result<(), OxenErro /// mtime change. Without re-adding, `git status` shows the files as modified /// even though the clean filter produces the identical blob. Re-adding lets /// Git refresh its stat cache. -fn git_add(repo_root: &Path, paths: &[PathBuf]) { +fn git_add(repo_root: &Path, paths: &[PathBuf]) -> Result<(), OxenError> { if paths.is_empty() { - return; + return Ok(()); } let path_args: Vec<&str> = paths.iter().filter_map(|p| p.to_str()).collect(); if path_args.is_empty() { - return; + return Ok(()); } let mut cmd = Command::new("git"); cmd.arg("add").args(&path_args).current_dir(repo_root); - if let Err(e) = cmd.output() { - log::warn!("oxen lfs: failed to run git add to refresh index: {e}"); + let output = cmd + .output() + .map_err(|e| OxenError::basic_str(format!("oxen lfs: failed to spawn git add: {e}")))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + log::warn!("oxen lfs: git add exited with {}: {stderr}", output.status); } + + Ok(()) } /// Scan working tree for pointer files and return the list of OIDs @@ -211,3 +344,67 @@ pub async fn list_pushable_oids( .map(|s| s.pointer) .collect()) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::lfs::filter; + use crate::lfs::gitattributes; + use tempfile::TempDir; + + #[tokio::test] + async fn test_push_no_remote_configured() { + // With no remote_url in lfs.toml, push should succeed silently. + let tmp = TempDir::new().unwrap(); + let repo_root = tmp.path(); + let oxen_dir = repo_root.join(".oxen"); + std::fs::create_dir_all(&oxen_dir).unwrap(); + + // Save config with no remote. + let cfg = LfsConfig::default(); + cfg.save(&oxen_dir).unwrap(); + + let result = push_to_remote(repo_root, &oxen_dir, &[]).await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_pull_local_only_no_network() { + // local_only pull should not attempt network calls; it should + // restore files that are in the local store and skip the rest. + let tmp = TempDir::new().unwrap(); + let repo_root = tmp.path(); + let oxen_dir = repo_root.join(".oxen"); + let versions_dir = oxen_dir.join("versions"); + std::fs::create_dir_all(&versions_dir).unwrap(); + + // Track *.bin and create a pointer file whose content IS local. + gitattributes::track_pattern(repo_root, "*.bin").unwrap(); + let content = b"local binary content"; + let pointer_bytes = filter::clean(&versions_dir, content).await.unwrap(); + std::fs::write(repo_root.join("data.bin"), &pointer_bytes).unwrap(); + + // Save default config (no remote). + LfsConfig::default().save(&oxen_dir).unwrap(); + + let result = pull_from_remote(repo_root, &oxen_dir, true).await; + assert!(result.is_ok()); + + // The file should be restored to real content. + let on_disk = std::fs::read(repo_root.join("data.bin")).unwrap(); + assert_eq!(on_disk, content); + } + + #[tokio::test] + async fn test_git_add_returns_result() { + // git_add on an empty list should be Ok. + let tmp = TempDir::new().unwrap(); + let result = git_add(tmp.path(), &[]); + assert!(result.is_ok()); + + // git_add on a path in a non-git dir should still return Ok + // (git add will fail but we only warn on non-zero exit). + let result = git_add(tmp.path(), &[PathBuf::from("nonexistent.txt")]); + assert!(result.is_ok()); + } +} From 23230c642c11c80275f0f876370dff651d6846e1 Mon Sep 17 00:00:00 2001 From: Malcolm Greaves Date: Thu, 26 Mar 2026 13:03:23 -0700 Subject: [PATCH 9/9] in-progress docs --- oxen-rust/docs/dev/OxenLfsBranchSummary.md | 113 +++++++++++++++++++++ oxen-rust/docs/dev/OxenLfsGitLfsParity.md | 89 ++++++++++++++++ 2 files changed, 202 insertions(+) create mode 100644 oxen-rust/docs/dev/OxenLfsBranchSummary.md create mode 100644 oxen-rust/docs/dev/OxenLfsGitLfsParity.md diff --git a/oxen-rust/docs/dev/OxenLfsBranchSummary.md b/oxen-rust/docs/dev/OxenLfsBranchSummary.md new file mode 100644 index 000000000..95282c962 --- /dev/null +++ b/oxen-rust/docs/dev/OxenLfsBranchSummary.md @@ -0,0 +1,113 @@ +# `oxen lfs` — Git Integration: Branch Summary + +## What This Is + +A **drop-in replacement for `git lfs`** that stores large file content in Oxen's version store and syncs it to an Oxen server. Users keep using Git for version control while offloading large binary files to Oxen's infrastructure instead of GitHub's LFS. + +--- + +## How It Works + +### Architecture + +``` +Git Repository +├── .git/hooks/ +│ ├── pre-push → oxen lfs push +│ ├── post-checkout → oxen lfs pull --local +│ └── post-merge → oxen lfs pull --local +├── .gitattributes *.bin filter=oxen diff=oxen merge=oxen -text +├── .gitignore .oxen/ +├── .oxen/ +│ ├── lfs.toml remote_url = "https://hub.oxen.ai/ns/repo" +│ └── versions/ content-addressable store (xxh3 hashes) +│ └── //data +└── working tree + └── model.bin (pointer file in Git, real content on disk) +``` + +### Pointer Format + +``` +version https://oxen.ai/spec/v1 +oid xxh3:a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8 +size 5242880 +``` + +Uses xxHash3-128 (fast, non-cryptographic) instead of git-lfs's SHA-256. + +### Key Data Flows + +**Clean (file -> pointer):** Git add/commit triggers the clean filter. Hashes content (xxHash3-128), stores blob in `.oxen/versions/`, returns a 3-line pointer (~100 bytes) that Git commits. + +**Smudge (pointer -> file):** Git checkout triggers the smudge filter. Tries 4 tiers: +1. Local `.oxen/versions/` store +2. Origin's `.oxen/versions/` (for local `git clone`) +3. Configured Oxen remote (HTTP, 30s timeout) +4. Fallback: return pointer bytes + warn + +**Push:** `pre-push` hook (or `oxen lfs push`) creates a temporary workspace on the Oxen server, uploads versioned blobs via `add_files` (handles batching + multipart), commits the workspace, cleans up. + +**Pull:** `post-checkout`/`post-merge` hooks (or `oxen lfs pull`) scan for pointer files, restore content from local -> origin -> remote, then `git add` to refresh the index stat cache. + +--- + +## All Files on This Branch + +### Library (`oxen-rust/src/lib/src/lfs/`) + +| File | Purpose | +|------|---------| +| `lfs.rs` | Module declaration (9 submodules) | +| `pointer.rs` | Pointer file encode/decode/validation (xxh3, 200-byte max) | +| `config.rs` | `.oxen/lfs.toml` load/save + `resolve_remote()` -> `RemoteRepository` | +| `gitattributes.rs` | `.gitattributes` track/untrack/list patterns | +| `install.rs` | Global `~/.gitconfig` filter driver install/uninstall | +| `hooks.rs` | `.git/hooks/` pre-push, post-checkout, post-merge (idempotent, preserves existing) | +| `filter.rs` | Clean filter (hash+store) and smudge filter (4-tier lookup with 30s remote timeout) | +| `filter_process.rs` | Git long-running filter protocol v2 (pkt-line, capability negotiation) | +| `status.rs` | Walk working tree, find pointers matching tracked patterns, check local availability | +| `sync.rs` | `push_to_remote` (workspace API), `pull_from_remote` (batch download), `fetch_all`, `git_add` | + +### CLI (`oxen-rust/src/cli/src/cmd/lfs/`) + +| Command | Purpose | +|---------|---------| +| `oxen lfs init [--remote URL]` | Initialize LFS in a git repo (creates .oxen/, hooks, .gitignore) | +| `oxen lfs install [--uninstall]` | Global filter driver in `~/.gitconfig` | +| `oxen lfs track ` | Add pattern to `.gitattributes` | +| `oxen lfs untrack ` | Remove pattern from `.gitattributes` | +| `oxen lfs push` | Upload versioned blobs to Oxen remote via workspace API | +| `oxen lfs pull [--local]` | Download + restore pointer files | +| `oxen lfs fetch-all` | Strict sync: errors if anything can't be resolved | +| `oxen lfs status` | Show tracked files + local/missing status | +| `oxen lfs clean` | Stdin->stdout clean filter for Git | +| `oxen lfs smudge` | Stdin->stdout smudge filter for Git | +| `oxen lfs filter-process` | Long-running filter process (pkt-line v2) | +| `oxen lfs env` | Print version, remote URL, versions dir, tracked patterns | + +### Modified Shared Code + +| File | Change | +|------|--------| +| `api/client/versions.rs` | Added `download_versions_to_store()` -- generic batch download to any `VersionStore` (refactored existing download to delegate, zero behavior change) | +| `constants.rs` | Added `OXEN_HIDDEN_DIR` constant | +| `lib.rs` / `cmd.rs` / `main.rs` | Registered lfs module and subcommands | + +--- + +## Tests + +44 LFS tests pass. Clippy clean. Coverage includes: +- Pointer serialization/deserialization/validation +- Config save/load/defaults +- `.gitattributes` manipulation (track, untrack, list, idempotency) +- Hook installation (creation, idempotency, preservation, permissions, path quoting) +- Global filter install/uninstall +- Clean filter (stores content, returns pointer, idempotent) +- Smudge filter (restores content, passthrough non-pointer, fallback on missing, remote fallback on unreachable server) +- pkt-line protocol (text/binary roundtrips, key=value pairs) +- Status detection (finds pointers matching patterns) +- Push with no remote (silent success) +- Pull local-only (no network, restores local content) +- `git_add` returns Result (empty list, non-git dir) diff --git a/oxen-rust/docs/dev/OxenLfsGitLfsParity.md b/oxen-rust/docs/dev/OxenLfsGitLfsParity.md new file mode 100644 index 000000000..79f16af6f --- /dev/null +++ b/oxen-rust/docs/dev/OxenLfsGitLfsParity.md @@ -0,0 +1,89 @@ +# `oxen lfs` vs `git lfs` — Parity Roadmap + +## Current State + +The `oxen lfs` integration is feature-complete for core local and remote workflows: clean/smudge filters, long-running filter process, git hooks, CLI commands, local clone support, and remote push/pull via the Oxen workspace API. + +--- + +## Remaining TODOs + +### From the `TODO` File + +1. **Auto-init on `git clone`** -- Detect `.gitattributes` with `filter=oxen` and auto-run `oxen lfs init` +2. **Fix gaps (oxen lfs push)** -- Vague; likely refers to edge cases + +### Missing Commands + +| Priority | Command | What It Does | Effort | +|----------|---------|-------------|--------| +| High | `lfs fetch` | Download objects without restoring (separate from `pull`) | Small | +| High | `lfs checkout` | Restore files from local cache only | Small (essentially `pull --local` as named command) | +| High | `lfs ls-files` | List all LFS-tracked files with their OIDs | Small (reuse `status::get_status`) | +| Medium | `lfs prune` | Delete unreferenced objects from `.oxen/versions/` | Medium (needs reachability analysis) | +| Medium | `lfs migrate import` | Rewrite history to convert large files to pointers | Large (needs `git filter-repo` integration) | +| Medium | `lfs migrate export` | Rewrite history to remove LFS, restore files inline | Large | +| Low | `lfs lock`/`unlock`/`locks` | File locking for binary assets | Large (needs server API) | +| Low | `lfs fsck` | Verify integrity of local objects | Small (hash each file, compare) | + +### Missing Features + +| Priority | Feature | Notes | +|----------|---------|-------| +| **High** | Skip re-uploading already-pushed files | Push doesn't check if remote already has a hash before uploading | +| **High** | Progress indicators | No progress bars during push/pull of large files | +| Medium | Per-branch/per-ref fetch | `fetch-all` downloads everything; no way to fetch for a specific ref | +| Medium | SSH transfer adapter | Only HTTP supported | +| Low | Custom transfer adapters | Extensibility for non-HTTP transports | +| Low | Custom merge driver | `merge=oxen` is declared in `.gitattributes` but no driver is implemented | +| Low | Deduplication / storage optimization | No chunking or dedup beyond content-addressing | + +--- + +## Intentional Divergences (Not Gaps) + +These are architectural decisions, not missing features: + +- **Hash**: xxHash3-128 vs SHA-256 -- speed over cryptographic guarantees +- **Server protocol**: Oxen workspace API vs git-lfs Batch API -- leverages existing Oxen infrastructure +- **Config**: `.oxen/lfs.toml` vs git config -- clean separation from git config namespace +- **Pointer namespace**: `oxen.ai/spec/v1` vs `git-lfs.github.com/spec/v1` + +--- + +## Full `git lfs` Command Coverage + +| `git lfs` Command | `oxen lfs` Equivalent | Status | +|-------------------|-----------------------|--------| +| `install` | `oxen lfs install` | Done | +| `uninstall` | `oxen lfs install --uninstall` | Done (flag, not separate command) | +| `track` | `oxen lfs track` | Done | +| `untrack` | `oxen lfs untrack` | Done | +| `push` | `oxen lfs push` | Done | +| `pull` | `oxen lfs pull` | Done | +| `fetch` | -- | Not implemented (separate from pull) | +| `checkout` | `oxen lfs pull --local` | Done (as flag, not separate command) | +| `status` | `oxen lfs status` | Done | +| `ls-files` | -- | Not implemented | +| `env` | `oxen lfs env` | Done | +| `clean` | `oxen lfs clean` | Done | +| `smudge` | `oxen lfs smudge` | Done | +| `filter-process` | `oxen lfs filter-process` | Done | +| `lock` / `unlock` | -- | Not implemented | +| `locks` | -- | Not implemented | +| `prune` | -- | Not implemented | +| `migrate import` | -- | Not implemented | +| `migrate export` | -- | Not implemented | +| `fsck` | -- | Not implemented | +| `clone` | -- | Not applicable (use `git clone` + `oxen lfs init`) | +| `dedup` | -- | Not implemented | +| `merge-driver` | -- | Not implemented | +| `logs` | -- | Not implemented | +| `pointer` | -- | Not implemented as CLI (library only) | + +### Additional `oxen lfs` Commands (No `git lfs` Equivalent) + +| Command | Purpose | +|---------|---------| +| `oxen lfs init [--remote URL]` | One-step repo setup (creates .oxen/, hooks, .gitignore, optional remote) | +| `oxen lfs fetch-all` | Strict sync: errors if any pointer can't be resolved (combines fetch + checkout + strict validation) |