diff --git a/TODO b/TODO new file mode 100644 index 000000000..2744ec874 --- /dev/null +++ b/TODO @@ -0,0 +1,2 @@ +- do `oxen lfs init` when doing a `git clone` on an oxen-enabled repository +- fix gaps (oxen lfs push) diff --git a/oxen-rust/docs/dev/OxenLfsBranchSummary.md b/oxen-rust/docs/dev/OxenLfsBranchSummary.md new file mode 100644 index 000000000..95282c962 --- /dev/null +++ b/oxen-rust/docs/dev/OxenLfsBranchSummary.md @@ -0,0 +1,113 @@ +# `oxen lfs` — Git Integration: Branch Summary + +## What This Is + +A **drop-in replacement for `git lfs`** that stores large file content in Oxen's version store and syncs it to an Oxen server. Users keep using Git for version control while offloading large binary files to Oxen's infrastructure instead of GitHub's LFS. + +--- + +## How It Works + +### Architecture + +``` +Git Repository +├── .git/hooks/ +│ ├── pre-push → oxen lfs push +│ ├── post-checkout → oxen lfs pull --local +│ └── post-merge → oxen lfs pull --local +├── .gitattributes *.bin filter=oxen diff=oxen merge=oxen -text +├── .gitignore .oxen/ +├── .oxen/ +│ ├── lfs.toml remote_url = "https://hub.oxen.ai/ns/repo" +│ └── versions/ content-addressable store (xxh3 hashes) +│ └── //data +└── working tree + └── model.bin (pointer file in Git, real content on disk) +``` + +### Pointer Format + +``` +version https://oxen.ai/spec/v1 +oid xxh3:a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8 +size 5242880 +``` + +Uses xxHash3-128 (fast, non-cryptographic) instead of git-lfs's SHA-256. + +### Key Data Flows + +**Clean (file -> pointer):** Git add/commit triggers the clean filter. Hashes content (xxHash3-128), stores blob in `.oxen/versions/`, returns a 3-line pointer (~100 bytes) that Git commits. + +**Smudge (pointer -> file):** Git checkout triggers the smudge filter. Tries 4 tiers: +1. Local `.oxen/versions/` store +2. Origin's `.oxen/versions/` (for local `git clone`) +3. Configured Oxen remote (HTTP, 30s timeout) +4. Fallback: return pointer bytes + warn + +**Push:** `pre-push` hook (or `oxen lfs push`) creates a temporary workspace on the Oxen server, uploads versioned blobs via `add_files` (handles batching + multipart), commits the workspace, cleans up. + +**Pull:** `post-checkout`/`post-merge` hooks (or `oxen lfs pull`) scan for pointer files, restore content from local -> origin -> remote, then `git add` to refresh the index stat cache. + +--- + +## All Files on This Branch + +### Library (`oxen-rust/src/lib/src/lfs/`) + +| File | Purpose | +|------|---------| +| `lfs.rs` | Module declaration (9 submodules) | +| `pointer.rs` | Pointer file encode/decode/validation (xxh3, 200-byte max) | +| `config.rs` | `.oxen/lfs.toml` load/save + `resolve_remote()` -> `RemoteRepository` | +| `gitattributes.rs` | `.gitattributes` track/untrack/list patterns | +| `install.rs` | Global `~/.gitconfig` filter driver install/uninstall | +| `hooks.rs` | `.git/hooks/` pre-push, post-checkout, post-merge (idempotent, preserves existing) | +| `filter.rs` | Clean filter (hash+store) and smudge filter (4-tier lookup with 30s remote timeout) | +| `filter_process.rs` | Git long-running filter protocol v2 (pkt-line, capability negotiation) | +| `status.rs` | Walk working tree, find pointers matching tracked patterns, check local availability | +| `sync.rs` | `push_to_remote` (workspace API), `pull_from_remote` (batch download), `fetch_all`, `git_add` | + +### CLI (`oxen-rust/src/cli/src/cmd/lfs/`) + +| Command | Purpose | +|---------|---------| +| `oxen lfs init [--remote URL]` | Initialize LFS in a git repo (creates .oxen/, hooks, .gitignore) | +| `oxen lfs install [--uninstall]` | Global filter driver in `~/.gitconfig` | +| `oxen lfs track ` | Add pattern to `.gitattributes` | +| `oxen lfs untrack ` | Remove pattern from `.gitattributes` | +| `oxen lfs push` | Upload versioned blobs to Oxen remote via workspace API | +| `oxen lfs pull [--local]` | Download + restore pointer files | +| `oxen lfs fetch-all` | Strict sync: errors if anything can't be resolved | +| `oxen lfs status` | Show tracked files + local/missing status | +| `oxen lfs clean` | Stdin->stdout clean filter for Git | +| `oxen lfs smudge` | Stdin->stdout smudge filter for Git | +| `oxen lfs filter-process` | Long-running filter process (pkt-line v2) | +| `oxen lfs env` | Print version, remote URL, versions dir, tracked patterns | + +### Modified Shared Code + +| File | Change | +|------|--------| +| `api/client/versions.rs` | Added `download_versions_to_store()` -- generic batch download to any `VersionStore` (refactored existing download to delegate, zero behavior change) | +| `constants.rs` | Added `OXEN_HIDDEN_DIR` constant | +| `lib.rs` / `cmd.rs` / `main.rs` | Registered lfs module and subcommands | + +--- + +## Tests + +44 LFS tests pass. Clippy clean. Coverage includes: +- Pointer serialization/deserialization/validation +- Config save/load/defaults +- `.gitattributes` manipulation (track, untrack, list, idempotency) +- Hook installation (creation, idempotency, preservation, permissions, path quoting) +- Global filter install/uninstall +- Clean filter (stores content, returns pointer, idempotent) +- Smudge filter (restores content, passthrough non-pointer, fallback on missing, remote fallback on unreachable server) +- pkt-line protocol (text/binary roundtrips, key=value pairs) +- Status detection (finds pointers matching patterns) +- Push with no remote (silent success) +- Pull local-only (no network, restores local content) +- `git_add` returns Result (empty list, non-git dir) diff --git a/oxen-rust/docs/dev/OxenLfsGitLfsParity.md b/oxen-rust/docs/dev/OxenLfsGitLfsParity.md new file mode 100644 index 000000000..79f16af6f --- /dev/null +++ b/oxen-rust/docs/dev/OxenLfsGitLfsParity.md @@ -0,0 +1,89 @@ +# `oxen lfs` vs `git lfs` — Parity Roadmap + +## Current State + +The `oxen lfs` integration is feature-complete for core local and remote workflows: clean/smudge filters, long-running filter process, git hooks, CLI commands, local clone support, and remote push/pull via the Oxen workspace API. + +--- + +## Remaining TODOs + +### From the `TODO` File + +1. **Auto-init on `git clone`** -- Detect `.gitattributes` with `filter=oxen` and auto-run `oxen lfs init` +2. **Fix gaps (oxen lfs push)** -- Vague; likely refers to edge cases + +### Missing Commands + +| Priority | Command | What It Does | Effort | +|----------|---------|-------------|--------| +| High | `lfs fetch` | Download objects without restoring (separate from `pull`) | Small | +| High | `lfs checkout` | Restore files from local cache only | Small (essentially `pull --local` as named command) | +| High | `lfs ls-files` | List all LFS-tracked files with their OIDs | Small (reuse `status::get_status`) | +| Medium | `lfs prune` | Delete unreferenced objects from `.oxen/versions/` | Medium (needs reachability analysis) | +| Medium | `lfs migrate import` | Rewrite history to convert large files to pointers | Large (needs `git filter-repo` integration) | +| Medium | `lfs migrate export` | Rewrite history to remove LFS, restore files inline | Large | +| Low | `lfs lock`/`unlock`/`locks` | File locking for binary assets | Large (needs server API) | +| Low | `lfs fsck` | Verify integrity of local objects | Small (hash each file, compare) | + +### Missing Features + +| Priority | Feature | Notes | +|----------|---------|-------| +| **High** | Skip re-uploading already-pushed files | Push doesn't check if remote already has a hash before uploading | +| **High** | Progress indicators | No progress bars during push/pull of large files | +| Medium | Per-branch/per-ref fetch | `fetch-all` downloads everything; no way to fetch for a specific ref | +| Medium | SSH transfer adapter | Only HTTP supported | +| Low | Custom transfer adapters | Extensibility for non-HTTP transports | +| Low | Custom merge driver | `merge=oxen` is declared in `.gitattributes` but no driver is implemented | +| Low | Deduplication / storage optimization | No chunking or dedup beyond content-addressing | + +--- + +## Intentional Divergences (Not Gaps) + +These are architectural decisions, not missing features: + +- **Hash**: xxHash3-128 vs SHA-256 -- speed over cryptographic guarantees +- **Server protocol**: Oxen workspace API vs git-lfs Batch API -- leverages existing Oxen infrastructure +- **Config**: `.oxen/lfs.toml` vs git config -- clean separation from git config namespace +- **Pointer namespace**: `oxen.ai/spec/v1` vs `git-lfs.github.com/spec/v1` + +--- + +## Full `git lfs` Command Coverage + +| `git lfs` Command | `oxen lfs` Equivalent | Status | +|-------------------|-----------------------|--------| +| `install` | `oxen lfs install` | Done | +| `uninstall` | `oxen lfs install --uninstall` | Done (flag, not separate command) | +| `track` | `oxen lfs track` | Done | +| `untrack` | `oxen lfs untrack` | Done | +| `push` | `oxen lfs push` | Done | +| `pull` | `oxen lfs pull` | Done | +| `fetch` | -- | Not implemented (separate from pull) | +| `checkout` | `oxen lfs pull --local` | Done (as flag, not separate command) | +| `status` | `oxen lfs status` | Done | +| `ls-files` | -- | Not implemented | +| `env` | `oxen lfs env` | Done | +| `clean` | `oxen lfs clean` | Done | +| `smudge` | `oxen lfs smudge` | Done | +| `filter-process` | `oxen lfs filter-process` | Done | +| `lock` / `unlock` | -- | Not implemented | +| `locks` | -- | Not implemented | +| `prune` | -- | Not implemented | +| `migrate import` | -- | Not implemented | +| `migrate export` | -- | Not implemented | +| `fsck` | -- | Not implemented | +| `clone` | -- | Not applicable (use `git clone` + `oxen lfs init`) | +| `dedup` | -- | Not implemented | +| `merge-driver` | -- | Not implemented | +| `logs` | -- | Not implemented | +| `pointer` | -- | Not implemented as CLI (library only) | + +### Additional `oxen lfs` Commands (No `git lfs` Equivalent) + +| Command | Purpose | +|---------|---------| +| `oxen lfs init [--remote URL]` | One-step repo setup (creates .oxen/, hooks, .gitignore, optional remote) | +| `oxen lfs fetch-all` | Strict sync: errors if any pointer can't be resolved (combines fetch + checkout + strict validation) | diff --git a/oxen-rust/src/cli/src/cmd.rs b/oxen-rust/src/cli/src/cmd.rs index db35f679d..77827fd71 100644 --- a/oxen-rust/src/cli/src/cmd.rs +++ b/oxen-rust/src/cli/src/cmd.rs @@ -51,6 +51,9 @@ pub use info::InfoCmd; pub mod init; pub use init::InitCmd; +pub mod lfs; +pub use lfs::LfsCmd; + pub mod load; pub use load::LoadCmd; diff --git a/oxen-rust/src/cli/src/cmd/lfs.rs b/oxen-rust/src/cli/src/cmd/lfs.rs new file mode 100644 index 000000000..c89ba289b --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs.rs @@ -0,0 +1,107 @@ +pub mod clean; +pub use clean::LfsCleanCmd; + +pub mod env; +pub use env::LfsEnvCmd; + +pub mod fetch_all; +pub use fetch_all::LfsFetchAllCmd; + +pub mod filter_process; +pub use filter_process::LfsFilterProcessCmd; + +pub mod init; +pub use init::LfsInitCmd; + +pub mod install; +pub use install::LfsInstallCmd; + +pub mod pull; +pub use pull::LfsPullCmd; + +pub mod push; +pub use push::LfsPushCmd; + +pub mod smudge; +pub use smudge::LfsSmudgeCmd; + +pub mod status; +pub use status::LfsStatusCmd; + +pub mod track; +pub use track::LfsTrackCmd; + +pub mod untrack; +pub use untrack::LfsUntrackCmd; + +use async_trait::async_trait; +use clap::Command; + +use liboxen::error::OxenError; +use std::collections::HashMap; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "lfs"; +pub struct LfsCmd; + +#[async_trait] +impl RunCmd for LfsCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + let mut command = Command::new(NAME) + .about("Oxen large file storage (Git LFS replacement)") + .subcommand_required(true) + .arg_required_else_help(true); + + let sub_commands = Self::get_subcommands(); + for cmd in sub_commands.values() { + command = command.subcommand(cmd.args()); + } + command + } + + async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> { + let sub_commands = Self::get_subcommands(); + if let Some((name, sub_matches)) = args.subcommand() { + let Some(cmd) = sub_commands.get(name) else { + eprintln!("Unknown lfs subcommand {name}"); + return Err(OxenError::basic_str(format!( + "Unknown lfs subcommand {name}" + ))); + }; + + tokio::task::block_in_place(|| { + tokio::runtime::Handle::current().block_on(cmd.run(sub_matches)) + })?; + } + Ok(()) + } +} + +impl LfsCmd { + fn get_subcommands() -> HashMap> { + let commands: Vec> = vec![ + Box::new(LfsCleanCmd), + Box::new(LfsEnvCmd), + Box::new(LfsFetchAllCmd), + Box::new(LfsFilterProcessCmd), + Box::new(LfsInitCmd), + Box::new(LfsInstallCmd), + Box::new(LfsPullCmd), + Box::new(LfsPushCmd), + Box::new(LfsSmudgeCmd), + Box::new(LfsStatusCmd), + Box::new(LfsTrackCmd), + Box::new(LfsUntrackCmd), + ]; + let mut runners: HashMap> = HashMap::new(); + for cmd in commands { + runners.insert(cmd.name().to_string(), cmd); + } + runners + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/clean.rs b/oxen-rust/src/cli/src/cmd/lfs/clean.rs new file mode 100644 index 000000000..05f3f623b --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/clean.rs @@ -0,0 +1,53 @@ +use async_trait::async_trait; +use clap::{Arg, Command}; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "clean"; +pub struct LfsCleanCmd; + +#[async_trait] +impl RunCmd for LfsCleanCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME) + .about("Clean filter for a single file (invoked by Git)") + .arg(Arg::new("separator").long("").hide(true)) + .arg( + Arg::new("file") + .help("Path to the file being cleaned") + .required(false), + ) + } + + async fn run(&self, _args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let versions_dir = repo_root.join(OXEN_HIDDEN_DIR).join("versions"); + + // Read content from stdin. + let content = { + use std::io::Read; + let mut buf = Vec::new(); + std::io::stdin().read_to_end(&mut buf)?; + buf + }; + + let result = lfs::filter::clean(&versions_dir, &content).await?; + + // Write result to stdout. + { + use std::io::Write; + std::io::stdout().write_all(&result)?; + std::io::stdout().flush()?; + } + + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/env.rs b/oxen-rust/src/cli/src/cmd/lfs/env.rs new file mode 100644 index 000000000..7241e716d --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/env.rs @@ -0,0 +1,53 @@ +use async_trait::async_trait; +use clap::Command; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "env"; +pub struct LfsEnvCmd; + +#[async_trait] +impl RunCmd for LfsEnvCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME).about("Show Oxen LFS environment and diagnostic info") + } + + async fn run(&self, _args: &clap::ArgMatches) -> Result<(), OxenError> { + println!("oxen lfs environment"); + println!(" oxen version: {}", liboxen::constants::OXEN_VERSION); + + let repo_root = std::env::current_dir()?; + let oxen_dir = repo_root.join(OXEN_HIDDEN_DIR); + + if oxen_dir.exists() { + let config = lfs::config::LfsConfig::load(&oxen_dir)?; + println!( + " remote: {}", + config.remote_url.as_deref().unwrap_or("(not set)") + ); + println!(" versions dir: {}", oxen_dir.join("versions").display()); + + let patterns = lfs::gitattributes::list_tracked_patterns(&repo_root)?; + if patterns.is_empty() { + println!(" tracked patterns: (none)"); + } else { + println!(" tracked patterns:"); + for p in &patterns { + println!(" {p}"); + } + } + } else { + println!(" Oxen LFS not initialized in this repository."); + } + + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/fetch_all.rs b/oxen-rust/src/cli/src/cmd/lfs/fetch_all.rs new file mode 100644 index 000000000..cbe563e91 --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/fetch_all.rs @@ -0,0 +1,37 @@ +use async_trait::async_trait; +use clap::Command; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "fetch-all"; +pub struct LfsFetchAllCmd; + +#[async_trait] +impl RunCmd for LfsFetchAllCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME).about( + "Resolve and restore ALL tracked pointer files. Errors if any file cannot be resolved.", + ) + } + + async fn run(&self, _args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let oxen_dir = repo_root.join(OXEN_HIDDEN_DIR); + + if !oxen_dir.exists() { + return Err(OxenError::basic_str( + "Not an oxen lfs repository. Run `oxen lfs init` first.", + )); + } + + lfs::sync::fetch_all(&repo_root, &oxen_dir).await + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/filter_process.rs b/oxen-rust/src/cli/src/cmd/lfs/filter_process.rs new file mode 100644 index 000000000..623f0a76a --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/filter_process.rs @@ -0,0 +1,31 @@ +use async_trait::async_trait; +use clap::Command; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "filter-process"; +pub struct LfsFilterProcessCmd; + +#[async_trait] +impl RunCmd for LfsFilterProcessCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME).about("Git long-running filter process (invoked by Git, not by users)") + } + + async fn run(&self, _args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let versions_dir = repo_root.join(OXEN_HIDDEN_DIR).join("versions"); + + // Run the blocking filter process loop. + lfs::filter_process::run_filter_process(&versions_dir)?; + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/init.rs b/oxen-rust/src/cli/src/cmd/lfs/init.rs new file mode 100644 index 000000000..54c17c66d --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/init.rs @@ -0,0 +1,102 @@ +use std::path::Path; + +use async_trait::async_trait; +use clap::{Arg, Command}; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "init"; +pub struct LfsInitCmd; + +#[async_trait] +impl RunCmd for LfsInitCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME) + .about("Initialize Oxen LFS in the current Git repository") + .arg( + Arg::new("remote") + .long("remote") + .help("Oxen remote URL for push/pull of large files"), + ) + } + + async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + + // Verify we are in a git repository. + let git_dir = repo_root.join(".git"); + if !git_dir.exists() { + return Err(OxenError::basic_str( + "Not a git repository. Run `git init` first.", + )); + } + + // Create .oxen/ directory. + let oxen_dir = repo_root.join(OXEN_HIDDEN_DIR); + std::fs::create_dir_all(&oxen_dir)?; + + // Create versions/ directory. + let versions_dir = oxen_dir.join("versions"); + std::fs::create_dir_all(&versions_dir)?; + + // Save LFS config. + let remote_url = args.get_one::("remote").cloned(); + let config = lfs::config::LfsConfig { remote_url }; + config.save(&oxen_dir)?; + + // Install hooks using the full path to the current oxen binary. + let oxen_bin = lfs::install::current_exe_path()?; + let oxen_path = std::path::Path::new(&oxen_bin); + lfs::hooks::install_hooks(&git_dir, oxen_path)?; + + // Add .oxen/ to .gitignore. + ensure_gitignore(&repo_root)?; + + println!("Oxen LFS initialized in {}", repo_root.display()); + + // If .gitattributes already has tracked patterns (e.g. after clone), + // hint that the user should pull to restore large files. + let patterns = lfs::gitattributes::list_tracked_patterns(&repo_root)?; + if !patterns.is_empty() { + println!( + "Tracked patterns found — run `oxen lfs pull` or `oxen lfs fetch-all` to restore large files." + ); + } + + Ok(()) + } +} + +/// Ensure `.oxen/` is listed in `.gitignore`. +fn ensure_gitignore(repo_root: &Path) -> Result<(), OxenError> { + let gitignore = repo_root.join(".gitignore"); + let pattern = format!("{OXEN_HIDDEN_DIR}/"); + + let existing = if gitignore.exists() { + std::fs::read_to_string(&gitignore)? + } else { + String::new() + }; + + if existing.lines().any(|l| l.trim() == pattern) { + return Ok(()); + } + + let mut content = existing; + if !content.is_empty() && !content.ends_with('\n') { + content.push('\n'); + } + content.push_str(&pattern); + content.push('\n'); + + std::fs::write(&gitignore, content)?; + Ok(()) +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/install.rs b/oxen-rust/src/cli/src/cmd/lfs/install.rs new file mode 100644 index 000000000..1862fe8b8 --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/install.rs @@ -0,0 +1,41 @@ +use async_trait::async_trait; +use clap::{Arg, ArgAction, Command}; + +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "install"; +pub struct LfsInstallCmd; + +#[async_trait] +impl RunCmd for LfsInstallCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME) + .about("Configure Git's global filter driver for Oxen LFS") + .arg( + Arg::new("uninstall") + .long("uninstall") + .help("Remove the global filter driver configuration") + .action(ArgAction::SetTrue), + ) + } + + async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> { + if args.get_flag("uninstall") { + lfs::install::uninstall_global_filter()?; + println!("Oxen LFS global filter uninstalled."); + } else { + let oxen_bin = lfs::install::current_exe_path()?; + let oxen_path = std::path::Path::new(&oxen_bin); + lfs::install::install_global_filter(oxen_path)?; + println!("Oxen LFS global filter installed (using {oxen_bin})."); + } + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/pull.rs b/oxen-rust/src/cli/src/cmd/lfs/pull.rs new file mode 100644 index 000000000..0fe6939ca --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/pull.rs @@ -0,0 +1,38 @@ +use async_trait::async_trait; +use clap::{Arg, ArgAction, Command}; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "pull"; +pub struct LfsPullCmd; + +#[async_trait] +impl RunCmd for LfsPullCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME) + .about("Pull and restore large files from Oxen remote or local store") + .arg( + Arg::new("local") + .long("local") + .help("Only restore from the local .oxen/versions/ store (no network)") + .action(ArgAction::SetTrue), + ) + } + + async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let oxen_dir = repo_root.join(OXEN_HIDDEN_DIR); + let local_only = args.get_flag("local"); + + lfs::sync::pull_from_remote(&repo_root, &oxen_dir, local_only).await?; + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/push.rs b/oxen-rust/src/cli/src/cmd/lfs/push.rs new file mode 100644 index 000000000..1b086f990 --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/push.rs @@ -0,0 +1,33 @@ +use async_trait::async_trait; +use clap::Command; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "push"; +pub struct LfsPushCmd; + +#[async_trait] +impl RunCmd for LfsPushCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME).about("Push large files to the configured Oxen remote") + } + + async fn run(&self, _args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let oxen_dir = repo_root.join(OXEN_HIDDEN_DIR); + + // Collect remaining args that were passed by the pre-push hook. + let hook_args: Vec = std::env::args().collect(); + + lfs::sync::push_to_remote(&repo_root, &oxen_dir, &hook_args).await?; + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/smudge.rs b/oxen-rust/src/cli/src/cmd/lfs/smudge.rs new file mode 100644 index 000000000..962d17b0f --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/smudge.rs @@ -0,0 +1,55 @@ +use async_trait::async_trait; +use clap::{Arg, Command}; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "smudge"; +pub struct LfsSmudgeCmd; + +#[async_trait] +impl RunCmd for LfsSmudgeCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME) + .about("Smudge filter for a single file (invoked by Git)") + .arg(Arg::new("separator").long("").hide(true)) + .arg( + Arg::new("file") + .help("Path to the file being smudged") + .required(false), + ) + } + + async fn run(&self, _args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let oxen_dir = repo_root.join(OXEN_HIDDEN_DIR); + let versions_dir = oxen_dir.join("versions"); + let config = lfs::config::LfsConfig::load(&oxen_dir)?; + + // Read pointer data from stdin. + let pointer_data = { + use std::io::Read; + let mut buf = Vec::new(); + std::io::stdin().read_to_end(&mut buf)?; + buf + }; + + let result = lfs::filter::smudge(&versions_dir, &repo_root, &config, &pointer_data).await?; + + // Write result to stdout. + { + use std::io::Write; + std::io::stdout().write_all(&result)?; + std::io::stdout().flush()?; + } + + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/status.rs b/oxen-rust/src/cli/src/cmd/lfs/status.rs new file mode 100644 index 000000000..e5d6be51c --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/status.rs @@ -0,0 +1,53 @@ +use async_trait::async_trait; +use clap::Command; + +use liboxen::constants::OXEN_HIDDEN_DIR; +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "status"; +pub struct LfsStatusCmd; + +#[async_trait] +impl RunCmd for LfsStatusCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME).about("Show status of Oxen LFS tracked files") + } + + async fn run(&self, _args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let oxen_dir = repo_root.join(OXEN_HIDDEN_DIR); + let versions_dir = oxen_dir.join("versions"); + + if !versions_dir.exists() { + println!("Oxen LFS not initialized. Run `oxen lfs init` first."); + return Ok(()); + } + + let statuses = lfs::status::get_status(&repo_root, &versions_dir).await?; + + if statuses.is_empty() { + println!("No LFS tracked files found."); + return Ok(()); + } + + for s in &statuses { + let local_indicator = if s.local { "local" } else { "missing" }; + println!( + "{} ({}, {} bytes, {})", + s.path.display(), + s.pointer.oid, + s.pointer.size, + local_indicator, + ); + } + + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/track.rs b/oxen-rust/src/cli/src/cmd/lfs/track.rs new file mode 100644 index 000000000..94e3d4a95 --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/track.rs @@ -0,0 +1,51 @@ +use async_trait::async_trait; +use clap::{Arg, Command}; + +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "track"; +pub struct LfsTrackCmd; + +#[async_trait] +impl RunCmd for LfsTrackCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME) + .about("Track a file pattern with Oxen LFS") + .arg( + Arg::new("pattern") + .help("File glob pattern to track (e.g. \"*.bin\", \"datasets/**\")") + .required(false), + ) + } + + async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + + match args.get_one::("pattern") { + Some(pattern) => { + lfs::gitattributes::track_pattern(&repo_root, pattern)?; + println!("Tracking \"{pattern}\""); + } + None => { + // No pattern: list currently tracked patterns. + let patterns = lfs::gitattributes::list_tracked_patterns(&repo_root)?; + if patterns.is_empty() { + println!("No patterns tracked by Oxen LFS."); + } else { + println!("Patterns tracked by Oxen LFS:"); + for p in &patterns { + println!(" {p}"); + } + } + } + } + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/cmd/lfs/untrack.rs b/oxen-rust/src/cli/src/cmd/lfs/untrack.rs new file mode 100644 index 000000000..3a1c0f046 --- /dev/null +++ b/oxen-rust/src/cli/src/cmd/lfs/untrack.rs @@ -0,0 +1,38 @@ +use async_trait::async_trait; +use clap::{Arg, Command}; + +use liboxen::error::OxenError; +use liboxen::lfs; + +use crate::cmd::RunCmd; + +pub const NAME: &str = "untrack"; +pub struct LfsUntrackCmd; + +#[async_trait] +impl RunCmd for LfsUntrackCmd { + fn name(&self) -> &str { + NAME + } + + fn args(&self) -> Command { + Command::new(NAME) + .about("Stop tracking a file pattern with Oxen LFS") + .arg( + Arg::new("pattern") + .help("File glob pattern to untrack") + .required(true), + ) + } + + async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> { + let repo_root = std::env::current_dir()?; + let pattern = args + .get_one::("pattern") + .ok_or_else(|| OxenError::basic_str("pattern is required"))?; + + lfs::gitattributes::untrack_pattern(&repo_root, pattern)?; + println!("Untracking \"{pattern}\""); + Ok(()) + } +} diff --git a/oxen-rust/src/cli/src/main.rs b/oxen-rust/src/cli/src/main.rs index 71467c1c2..44bff438c 100644 --- a/oxen-rust/src/cli/src/main.rs +++ b/oxen-rust/src/cli/src/main.rs @@ -62,6 +62,7 @@ async fn async_main() -> ExitCode { Box::new(cmd::EmbeddingsCmd), Box::new(cmd::InfoCmd), Box::new(cmd::InitCmd), + Box::new(cmd::LfsCmd), Box::new(cmd::LoadCmd), Box::new(cmd::LogCmd), Box::new(cmd::LsCmd), diff --git a/oxen-rust/src/lib/src/api/client/versions.rs b/oxen-rust/src/lib/src/api/client/versions.rs index 024479ad1..1eceed6fd 100644 --- a/oxen-rust/src/lib/src/api/client/versions.rs +++ b/oxen-rust/src/lib/src/api/client/versions.rs @@ -5,6 +5,7 @@ use crate::constants::{max_retries, AVG_CHUNK_SIZE}; use crate::error::OxenError; use crate::model::entry::commit_entry::Entry; use crate::model::{LocalRepository, MerkleHash, RemoteRepository}; +use crate::storage::version_store::VersionStore; use crate::util::{self, concurrency, hasher}; use crate::view::versions::{ CleanCorruptedVersionsResponse, CompleteVersionUploadRequest, CompletedFileUpload, @@ -226,15 +227,56 @@ pub async fn try_download_data_from_version_paths( remote_repo: &RemoteRepository, hashes: &[String], local_repo: &LocalRepository, +) -> Result { + let version_store = local_repo.version_store()?; + try_download_versions_to_store(remote_repo, hashes, version_store.as_ref()).await +} + +/// Generic batch download of version blobs into any [`VersionStore`]. +/// +/// Sends the requested hashes to the server, receives a gzip+tar archive, +/// and streams each entry into `version_store` via `store_version_from_reader`. +pub async fn download_versions_to_store( + remote_repo: &RemoteRepository, + hashes: &[String], + version_store: &dyn VersionStore, +) -> Result { + let total_retries = max_retries().try_into().unwrap_or(max_retries() as u64); + let mut num_retries = 0; + + while num_retries < total_retries { + match try_download_versions_to_store(remote_repo, hashes, version_store).await { + Ok(val) => return Ok(val), + Err(OxenError::Authentication(val)) => return Err(OxenError::Authentication(val)), + Err(err) => { + num_retries += 1; + let sleep_time = num_retries * num_retries; + log::warn!("Could not download content {err:?} sleeping {sleep_time}"); + tokio::time::sleep(std::time::Duration::from_secs(sleep_time)).await; + } + } + } + + let err = format!( + "Err: Failed to download {} files after {} retries", + hashes.len(), + total_retries + ); + Err(OxenError::basic_str(err)) +} + +async fn try_download_versions_to_store( + remote_repo: &RemoteRepository, + hashes: &[String], + version_store: &dyn VersionStore, ) -> Result { let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); for hash in hashes.iter() { let line = format!("{hash}\n"); - // log::debug!("download_data_from_version_paths encoding line: {} path: {:?}", line, path); encoder.write_all(line.as_bytes())?; } let body = encoder.finish()?; - log::debug!("download_data_from_version_paths body len: {}", body.len()); + log::debug!("download_versions_to_store body len: {}", body.len()); let url = api::endpoint::url_from_repo(remote_repo, "/versions")?; let client = client::new_for_url(&url)?; @@ -254,7 +296,6 @@ pub async fn try_download_data_from_version_paths( let decoder = GzipDecoder::new(buf_reader); let mut archive = Archive::new(decoder); - let version_store = local_repo.version_store()?; let mut size: u64 = 0; // Iterate over archive entries and stream them to version store @@ -298,8 +339,7 @@ pub async fn try_download_data_from_version_paths( Ok(size) } else { - let err = - format!("api::entries::download_data_from_version_paths Err request failed: {url}"); + let err = format!("api::versions::download_versions_to_store Err request failed: {url}"); Err(OxenError::basic_str(err)) } } diff --git a/oxen-rust/src/lib/src/constants.rs b/oxen-rust/src/lib/src/constants.rs index 244a0c7ed..40611b218 100644 --- a/oxen-rust/src/lib/src/constants.rs +++ b/oxen-rust/src/lib/src/constants.rs @@ -273,3 +273,9 @@ pub fn chunk_size() -> u64 { // Oxen request Id pub const OXEN_REQUEST_ID: &str = "x-oxen-request-id"; + +// LFS +/// Name of the LFS config file inside .oxen/ +pub const LFS_CONFIG_FILENAME: &str = "lfs.toml"; +/// Subdirectory under .oxen/ where large file versions are cached +pub const LFS_VERSIONS_DIR: &str = "versions"; diff --git a/oxen-rust/src/lib/src/lfs.rs b/oxen-rust/src/lib/src/lfs.rs new file mode 100644 index 000000000..33f16afe9 --- /dev/null +++ b/oxen-rust/src/lib/src/lfs.rs @@ -0,0 +1,9 @@ +pub mod config; +pub mod filter; +pub mod filter_process; +pub mod gitattributes; +pub mod hooks; +pub mod install; +pub mod pointer; +pub mod status; +pub mod sync; diff --git a/oxen-rust/src/lib/src/lfs/config.rs b/oxen-rust/src/lib/src/lfs/config.rs new file mode 100644 index 000000000..a32c59d62 --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/config.rs @@ -0,0 +1,79 @@ +use std::path::Path; + +use serde::{Deserialize, Serialize}; + +use crate::api; +use crate::error::OxenError; +use crate::model::RemoteRepository; + +const LFS_CONFIG_FILENAME: &str = "lfs.toml"; + +/// Configuration stored in `.oxen/lfs.toml` within a Git repository. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct LfsConfig { + /// Optional Oxen remote URL for push/pull of large file content. + pub remote_url: Option, +} + +impl LfsConfig { + /// Load from `/lfs.toml`. Returns defaults if the file does not exist. + pub fn load(oxen_dir: &Path) -> Result { + let path = oxen_dir.join(LFS_CONFIG_FILENAME); + if !path.exists() { + return Ok(Self::default()); + } + let text = std::fs::read_to_string(&path)?; + let config: LfsConfig = toml::from_str(&text).map_err(OxenError::TomlDe)?; + Ok(config) + } + + /// Resolve `remote_url` to a [`RemoteRepository`]. + /// + /// Returns `Ok(None)` when no remote is configured. Returns an error if + /// the URL is set but the repository cannot be found on the server. + pub async fn resolve_remote(&self) -> Result, OxenError> { + let url = match &self.remote_url { + Some(u) if !u.is_empty() => u, + _ => return Ok(None), + }; + match api::client::repositories::get_by_url(url).await? { + Some(repo) => Ok(Some(repo)), + None => Err(OxenError::basic_str(format!( + "oxen lfs: remote repository not found at {url}" + ))), + } + } + + /// Persist to `/lfs.toml`. + pub fn save(&self, oxen_dir: &Path) -> Result<(), OxenError> { + let path = oxen_dir.join(LFS_CONFIG_FILENAME); + let text = toml::to_string_pretty(self).map_err(OxenError::TomlSer)?; + std::fs::write(&path, text)?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_config_save_and_load() { + let tmp = TempDir::new().unwrap(); + let cfg = LfsConfig { + remote_url: Some("https://hub.oxen.ai/user/repo".to_string()), + }; + cfg.save(tmp.path()).unwrap(); + + let loaded = LfsConfig::load(tmp.path()).unwrap(); + assert_eq!(loaded.remote_url, cfg.remote_url); + } + + #[test] + fn test_config_load_defaults_when_missing() { + let tmp = TempDir::new().unwrap(); + let cfg = LfsConfig::load(tmp.path()).unwrap(); + assert!(cfg.remote_url.is_none()); + } +} diff --git a/oxen-rust/src/lib/src/lfs/filter.rs b/oxen-rust/src/lib/src/lfs/filter.rs new file mode 100644 index 000000000..be13be0a8 --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/filter.rs @@ -0,0 +1,319 @@ +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::time::Duration; + +use crate::api; +use crate::error::OxenError; +use crate::lfs::config::LfsConfig; +use crate::lfs::pointer::PointerFile; +use crate::storage::version_store::VersionStore; +use crate::storage::LocalVersionStore; +use crate::util::hasher; + +/// Clean filter: hash content, store in version store, return pointer bytes. +/// +/// If the input is already a valid pointer it is returned unchanged (idempotent). +pub async fn clean(versions_dir: &Path, content: &[u8]) -> Result, OxenError> { + // Idempotent: don't re-clean a pointer. + if PointerFile::is_pointer(content) { + return Ok(content.to_vec()); + } + + let hash = hasher::hash_buffer(content); + + let store = LocalVersionStore::new(versions_dir); + store.init().await?; + store.store_version(&hash, content).await?; + + let pointer = PointerFile::new(&hash, content.len() as u64); + Ok(pointer.encode()) +} + +/// Smudge filter: parse pointer, look up content in version store, return content. +/// +/// Strategy: +/// 1. Local `.oxen/versions/` store. +/// 2. Origin's `.oxen/versions/` (for local clones — discovered via `git config remote.origin.url`). +/// 3. Configured Oxen remote (with 30 s timeout). +/// 4. Fallback: return pointer bytes unchanged with a warning. +pub async fn smudge( + versions_dir: &Path, + repo_root: &Path, + lfs_config: &LfsConfig, + pointer_data: &[u8], +) -> Result, OxenError> { + // Not a pointer — return data as-is. + let pointer = match PointerFile::decode(pointer_data) { + Some(p) => p, + None => return Ok(pointer_data.to_vec()), + }; + + // Ensure versions dir exists (may be missing on a fresh clone). + std::fs::create_dir_all(versions_dir).ok(); + + let store = LocalVersionStore::new(versions_dir); + + // 1. Try local store. + if store.version_exists(&pointer.oid).await? { + return store.get_version(&pointer.oid).await; + } + + // 2. Try origin's version store (local clones only). + if let Some(origin_versions) = origin_versions_dir(repo_root) { + let origin_store = LocalVersionStore::new(&origin_versions); + if origin_store.version_exists(&pointer.oid).await? { + // Copy into our local store for future use. + let data = origin_store.get_version(&pointer.oid).await?; + store.init().await?; + store.store_version(&pointer.oid, &data).await?; + return Ok(data); + } + } + + // 3. Try the configured Oxen remote with a 30 s timeout. + if lfs_config.remote_url.is_some() { + match tokio::time::timeout( + Duration::from_secs(30), + try_fetch_from_remote(lfs_config, &pointer.oid, &store), + ) + .await + { + Ok(Ok(true)) => { + // Successfully downloaded — read from local store. + return store.get_version(&pointer.oid).await; + } + Ok(Ok(false)) => { + log::debug!("oxen lfs smudge: remote configured but fetch returned nothing"); + } + Ok(Err(e)) => { + log::warn!( + "oxen lfs smudge: remote fetch failed for {}: {e}", + pointer.oid + ); + } + Err(_) => { + log::warn!( + "oxen lfs smudge: remote fetch timed out for {}", + pointer.oid + ); + } + } + } + + // 4. Fallback — return pointer bytes and warn. + log::warn!( + "oxen lfs smudge: content for {} not available locally; run `oxen lfs pull`", + pointer.oid, + ); + Ok(pointer_data.to_vec()) +} + +/// Attempt to download a single version from the configured remote. +/// Returns `Ok(true)` if the hash was successfully stored locally. +async fn try_fetch_from_remote( + lfs_config: &LfsConfig, + oid: &str, + store: &LocalVersionStore, +) -> Result { + let remote_repo = match lfs_config.resolve_remote().await? { + Some(r) => r, + None => return Ok(false), + }; + let hashes = vec![oid.to_string()]; + api::client::versions::download_versions_to_store(&remote_repo, &hashes, store).await?; + store.version_exists(oid).await +} + +/// Discover the origin's `.oxen/versions/` directory for local clones. +/// +/// Returns `None` if the origin is a remote URL or doesn't have an `.oxen/versions/` dir. +fn origin_versions_dir(repo_root: &Path) -> Option { + let url = get_origin_url(repo_root)?; + let origin_path = as_local_path(&url)?; + let versions = origin_path.join(".oxen").join("versions"); + if versions.is_dir() { + Some(versions) + } else { + None + } +} + +/// Run `git config remote.origin.url` in the given repo directory. +fn get_origin_url(repo_root: &Path) -> Option { + let output = Command::new("git") + .args(["config", "remote.origin.url"]) + .current_dir(repo_root) + .output() + .ok()?; + + if !output.status.success() { + return None; + } + + let url = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if url.is_empty() { + None + } else { + Some(url) + } +} + +/// Convert a Git remote URL to a local filesystem path, if it is one. +/// +/// Handles: +/// - Absolute paths: `/foo/bar` +/// - `file://` URLs: `file:///foo/bar` +/// +/// Returns `None` for remote URLs (ssh://, https://, git@, etc.). +fn as_local_path(url: &str) -> Option { + if let Some(stripped) = url.strip_prefix("file://") { + let path = PathBuf::from(stripped); + if path.is_dir() { + return Some(path); + } + return None; + } + + // Reject obvious remote URLs. + if url.contains("://") || url.contains('@') { + return None; + } + + let path = PathBuf::from(url); + if path.is_absolute() && path.is_dir() { + Some(path) + } else { + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[tokio::test] + async fn test_clean_stores_and_returns_pointer() { + let tmp = TempDir::new().unwrap(); + let versions_dir = tmp.path().join("versions"); + + let content = b"hello world, this is a large file"; + let result = clean(&versions_dir, content).await.unwrap(); + + // Result should be a valid pointer. + let ptr = PointerFile::decode(&result).expect("should be a pointer"); + assert_eq!(ptr.size, content.len() as u64); + + // Content should be in the store. + let store = LocalVersionStore::new(&versions_dir); + assert!(store.version_exists(&ptr.oid).await.unwrap()); + let stored = store.get_version(&ptr.oid).await.unwrap(); + assert_eq!(stored, content); + } + + #[tokio::test] + async fn test_clean_is_idempotent() { + let tmp = TempDir::new().unwrap(); + let versions_dir = tmp.path().join("versions"); + + let content = b"some content"; + let pointer_bytes = clean(&versions_dir, content).await.unwrap(); + + // Cleaning the pointer again should return it unchanged. + let double = clean(&versions_dir, &pointer_bytes).await.unwrap(); + assert_eq!(pointer_bytes, double); + } + + #[tokio::test] + async fn test_smudge_restores_content() { + let tmp = TempDir::new().unwrap(); + let repo_root = tmp.path(); + let versions_dir = tmp.path().join("versions"); + let config = LfsConfig::default(); + + let content = b"restore me"; + let pointer_bytes = clean(&versions_dir, content).await.unwrap(); + + let restored = smudge(&versions_dir, repo_root, &config, &pointer_bytes) + .await + .unwrap(); + assert_eq!(restored, content); + } + + #[tokio::test] + async fn test_smudge_passthrough_non_pointer() { + let tmp = TempDir::new().unwrap(); + let repo_root = tmp.path(); + let versions_dir = tmp.path().join("versions"); + let config = LfsConfig::default(); + + let data = b"not a pointer"; + let result = smudge(&versions_dir, repo_root, &config, data) + .await + .unwrap(); + assert_eq!(result, data); + } + + #[tokio::test] + async fn test_smudge_fallback_when_missing() { + let tmp = TempDir::new().unwrap(); + let repo_root = tmp.path(); + let versions_dir = tmp.path().join("versions"); + let config = LfsConfig::default(); + + // Fabricate a pointer whose content is NOT in the store. + let ptr = PointerFile::new("a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8", 999); + let pointer_bytes = ptr.encode(); + + let result = smudge(&versions_dir, repo_root, &config, &pointer_bytes) + .await + .unwrap(); + // Falls back to returning the pointer bytes. + assert_eq!(result, pointer_bytes); + } + + #[test] + fn test_as_local_path_rejects_ssh() { + assert!(as_local_path("git@github.com:user/repo.git").is_none()); + } + + #[test] + fn test_as_local_path_rejects_https() { + assert!(as_local_path("https://github.com/user/repo.git").is_none()); + } + + #[test] + fn test_as_local_path_accepts_file_url() { + let tmp = TempDir::new().unwrap(); + let url = format!("file://{}", tmp.path().display()); + assert_eq!(as_local_path(&url), Some(tmp.path().to_path_buf())); + } + + #[test] + fn test_as_local_path_accepts_absolute_path() { + let tmp = TempDir::new().unwrap(); + let path_str = tmp.path().to_string_lossy().to_string(); + assert_eq!(as_local_path(&path_str), Some(tmp.path().to_path_buf())); + } + + #[tokio::test] + async fn test_smudge_remote_fallback_on_no_server() { + // When remote_url is set to an unreachable server, smudge should + // fall back gracefully to returning the pointer bytes. + let tmp = TempDir::new().unwrap(); + let repo_root = tmp.path(); + let versions_dir = tmp.path().join("versions"); + let config = LfsConfig { + remote_url: Some("http://127.0.0.1:19999/nonexistent/repo".to_string()), + }; + + let ptr = PointerFile::new("deadbeefdeadbeefdeadbeefdeadbeef", 42); + let pointer_bytes = ptr.encode(); + + let result = smudge(&versions_dir, repo_root, &config, &pointer_bytes) + .await + .unwrap(); + // Should fall back to returning the pointer unchanged. + assert_eq!(result, pointer_bytes); + } +} diff --git a/oxen-rust/src/lib/src/lfs/filter_process.rs b/oxen-rust/src/lib/src/lfs/filter_process.rs new file mode 100644 index 000000000..1ff79a7fb --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/filter_process.rs @@ -0,0 +1,269 @@ +use std::io::{self, BufReader, BufWriter, Write}; +use std::path::Path; + +use crate::error::OxenError; +use crate::lfs::config::LfsConfig; +use crate::lfs::filter; + +/// pkt-line helpers for the Git long-running filter protocol. +pub mod pkt_line { + use std::io::{self, BufRead, Write}; + + const MAX_PKT_PAYLOAD: usize = 65516; + + /// Write a pkt-line text packet (adds newline automatically). + pub fn write_text(w: &mut impl Write, text: &str) -> io::Result<()> { + let payload = format!("{text}\n"); + let len = payload.len() + 4; // 4-byte length prefix + write!(w, "{len:04x}{payload}")?; + Ok(()) + } + + /// Write binary data as one or more pkt-line packets. + pub fn write_binary(w: &mut impl Write, data: &[u8]) -> io::Result<()> { + for chunk in data.chunks(MAX_PKT_PAYLOAD) { + let len = chunk.len() + 4; + write!(w, "{len:04x}")?; + w.write_all(chunk)?; + } + Ok(()) + } + + /// Write a flush packet (0000). + pub fn write_flush(w: &mut impl Write) -> io::Result<()> { + w.write_all(b"0000")?; + Ok(()) + } + + /// Read one pkt-line packet. Returns `None` on flush (0000) or EOF. + pub fn read_packet(r: &mut impl BufRead) -> io::Result>> { + let mut len_buf = [0u8; 4]; + match r.read_exact(&mut len_buf) { + Ok(()) => {} + Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None), + Err(e) => return Err(e), + } + + let len_str = std::str::from_utf8(&len_buf) + .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "invalid pkt-line length"))?; + + let len = usize::from_str_radix(len_str, 16) + .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "invalid pkt-line hex"))?; + + if len == 0 { + // Flush packet. + return Ok(None); + } + + if len < 4 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "pkt-line length too small", + )); + } + + let payload_len = len - 4; + let mut payload = vec![0u8; payload_len]; + r.read_exact(&mut payload)?; + Ok(Some(payload)) + } + + /// Read all packets until flush, concatenating their payloads. + pub fn read_until_flush(r: &mut impl BufRead) -> io::Result> { + let mut result = Vec::new(); + while let Some(pkt) = read_packet(r)? { + result.extend_from_slice(&pkt); + } + Ok(result) + } + + /// Read all text lines until flush. Returns each line trimmed of trailing newline. + pub fn read_lines_until_flush(r: &mut impl BufRead) -> io::Result> { + let mut lines = Vec::new(); + while let Some(pkt) = read_packet(r)? { + let text = String::from_utf8_lossy(&pkt); + lines.push(text.trim_end_matches('\n').to_string()); + } + Ok(lines) + } + + /// Read text key=value pairs until flush (lines without `=` are skipped). + pub fn read_text_pairs_until_flush(r: &mut impl BufRead) -> io::Result> { + let lines = read_lines_until_flush(r)?; + let pairs = lines + .into_iter() + .filter_map(|line| { + line.split_once('=') + .map(|(k, v)| (k.to_string(), v.to_string())) + }) + .collect(); + Ok(pairs) + } +} + +/// Run the long-running Git filter process on stdin/stdout. +/// +/// This implements the protocol described in `gitattributes(5)` under +/// "Long Running Filter Process". +pub fn run_filter_process(versions_dir: &Path) -> Result<(), OxenError> { + let stdin = io::stdin(); + let stdout = io::stdout(); + let mut reader = BufReader::new(stdin.lock()); + let mut writer = BufWriter::new(stdout.lock()); + + // Ensure versions dir exists (may be missing on a fresh clone). + std::fs::create_dir_all(versions_dir).ok(); + + // Derive repo_root: versions_dir is .oxen/versions, so two parents up. + let oxen_dir = versions_dir.parent().unwrap_or(Path::new(".")); + let repo_root = oxen_dir.parent().unwrap_or(Path::new(".")); + + let lfs_config = LfsConfig::load(oxen_dir)?; + + // --- Handshake --- + // Phase 1: Git sends welcome + version(s) in one flush group. + // packet: git-filter-client\n + // packet: version=2\n + // packet: 0000 + let welcome_lines = pkt_line::read_lines_until_flush(&mut reader)?; + + if !welcome_lines.iter().any(|l| l == "git-filter-client") { + return Err(OxenError::basic_str( + "expected git-filter-client in handshake", + )); + } + + // Respond with welcome + chosen version in one flush group. + pkt_line::write_text(&mut writer, "git-filter-server")?; + pkt_line::write_text(&mut writer, "version=2")?; + pkt_line::write_flush(&mut writer)?; + writer.flush()?; + + // Phase 2: Git sends its capabilities (e.g. capability=clean, + // capability=smudge) in one flush group. We read and discard them + // because the protocol requires consuming this flush group before + // we can advertise our own capabilities. We unconditionally + // advertise both clean and smudge regardless of what Git offers. + let _caps = pkt_line::read_text_pairs_until_flush(&mut reader)?; + + // Respond with the capabilities we support. + pkt_line::write_text(&mut writer, "capability=clean")?; + pkt_line::write_text(&mut writer, "capability=smudge")?; + pkt_line::write_flush(&mut writer)?; + writer.flush()?; + + // Get a handle to the current tokio runtime. The CLI's main() already + // starts one, so we must not create a second. We use block_in_place + + // block_on to run async version-store ops from this synchronous context. + let handle = tokio::runtime::Handle::current(); + + // --- Per-file loop --- + loop { + // Read command + pathname (key=value pairs until flush). + let pairs = match pkt_line::read_text_pairs_until_flush(&mut reader) { + Ok(p) if p.is_empty() => break, // EOF / no more commands + Ok(p) => p, + Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break, + Err(e) => return Err(OxenError::IO(e)), + }; + + let command = pairs + .iter() + .find(|(k, _)| k == "command") + .map(|(_, v)| v.as_str()) + .unwrap_or(""); + + // Read content until flush. + let content = pkt_line::read_until_flush(&mut reader)?; + + let result = match command { + "clean" => tokio::task::block_in_place(|| { + handle.block_on(filter::clean(versions_dir, &content)) + })?, + "smudge" => tokio::task::block_in_place(|| { + handle.block_on(filter::smudge( + versions_dir, + repo_root, + &lfs_config, + &content, + )) + })?, + other => { + log::warn!("oxen lfs filter-process: unknown command '{other}', passing through"); + content + } + }; + + // Write status=success, flush, content, flush, flush. + pkt_line::write_text(&mut writer, "status=success")?; + pkt_line::write_flush(&mut writer)?; + pkt_line::write_binary(&mut writer, &result)?; + pkt_line::write_flush(&mut writer)?; + pkt_line::write_flush(&mut writer)?; + writer.flush()?; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::pkt_line::*; + use std::io::Cursor; + + #[test] + fn test_pkt_line_roundtrip_text() { + let mut buf = Vec::new(); + write_text(&mut buf, "hello").unwrap(); + write_flush(&mut buf).unwrap(); + + let mut reader = Cursor::new(buf); + let pkt = read_packet(&mut reader).unwrap().unwrap(); + assert_eq!(String::from_utf8_lossy(&pkt), "hello\n"); + + // Next read should be flush => None + let flush = read_packet(&mut reader).unwrap(); + assert!(flush.is_none()); + } + + #[test] + fn test_pkt_line_binary() { + let data = vec![0u8; 100]; + let mut buf = Vec::new(); + write_binary(&mut buf, &data).unwrap(); + write_flush(&mut buf).unwrap(); + + let mut reader = Cursor::new(buf); + let result = read_until_flush(&mut reader).unwrap(); + assert_eq!(result, data); + } + + #[test] + fn test_read_text_pairs() { + let mut buf = Vec::new(); + write_text(&mut buf, "command=clean").unwrap(); + write_text(&mut buf, "pathname=test.bin").unwrap(); + write_flush(&mut buf).unwrap(); + + let mut reader = Cursor::new(buf); + let pairs = read_text_pairs_until_flush(&mut reader).unwrap(); + assert_eq!(pairs.len(), 2); + assert_eq!(pairs[0], ("command".to_string(), "clean".to_string())); + assert_eq!(pairs[1], ("pathname".to_string(), "test.bin".to_string())); + } + + #[test] + fn test_read_lines_includes_non_pairs() { + // Git sends "git-filter-client" (no =) plus "version=2" in one group. + let mut buf = Vec::new(); + write_text(&mut buf, "git-filter-client").unwrap(); + write_text(&mut buf, "version=2").unwrap(); + write_flush(&mut buf).unwrap(); + + let mut reader = Cursor::new(buf); + let lines = read_lines_until_flush(&mut reader).unwrap(); + assert_eq!(lines.len(), 2); + assert_eq!(lines[0], "git-filter-client"); + assert_eq!(lines[1], "version=2"); + } +} diff --git a/oxen-rust/src/lib/src/lfs/gitattributes.rs b/oxen-rust/src/lib/src/lfs/gitattributes.rs new file mode 100644 index 000000000..fad1fc12d --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/gitattributes.rs @@ -0,0 +1,149 @@ +use std::path::Path; + +use crate::error::OxenError; + +const GITATTRIBUTES: &str = ".gitattributes"; + +/// The filter/diff/merge attributes appended to each tracked pattern. +const ATTR_SUFFIX: &str = "filter=oxen diff=oxen merge=oxen -text"; + +/// Format a .gitattributes line for a given pattern. +fn format_line(pattern: &str) -> String { + format!("{pattern} {ATTR_SUFFIX}") +} + +/// Add a pattern to `.gitattributes` (idempotent — skips if already present). +pub fn track_pattern(repo_root: &Path, pattern: &str) -> Result<(), OxenError> { + let ga_path = repo_root.join(GITATTRIBUTES); + let line = format_line(pattern); + + let existing = if ga_path.exists() { + std::fs::read_to_string(&ga_path)? + } else { + String::new() + }; + + // Already tracked? + if existing.lines().any(|l| l.trim() == line.trim()) { + return Ok(()); + } + + // Append (ensure trailing newline in existing content). + let mut content = existing; + if !content.is_empty() && !content.ends_with('\n') { + content.push('\n'); + } + content.push_str(&line); + content.push('\n'); + + std::fs::write(&ga_path, content)?; + Ok(()) +} + +/// Remove a pattern from `.gitattributes`. +pub fn untrack_pattern(repo_root: &Path, pattern: &str) -> Result<(), OxenError> { + let ga_path = repo_root.join(GITATTRIBUTES); + if !ga_path.exists() { + return Ok(()); + } + + let line = format_line(pattern); + let existing = std::fs::read_to_string(&ga_path)?; + let filtered: Vec<&str> = existing + .lines() + .filter(|l| l.trim() != line.trim()) + .collect(); + + let mut content = filtered.join("\n"); + if !content.is_empty() { + content.push('\n'); + } + + std::fs::write(&ga_path, content)?; + Ok(()) +} + +/// List all patterns currently tracked with the oxen filter. +pub fn list_tracked_patterns(repo_root: &Path) -> Result, OxenError> { + let ga_path = repo_root.join(GITATTRIBUTES); + if !ga_path.exists() { + return Ok(Vec::new()); + } + + let text = std::fs::read_to_string(&ga_path)?; + let patterns = text + .lines() + .filter(|l| l.contains(ATTR_SUFFIX)) + .filter_map(|l| { + let trimmed = l.trim(); + trimmed + .strip_suffix(ATTR_SUFFIX) + .map(|p| p.trim().to_string()) + }) + .collect(); + Ok(patterns) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_track_creates_gitattributes() { + let tmp = TempDir::new().unwrap(); + track_pattern(tmp.path(), "*.bin").unwrap(); + + let content = std::fs::read_to_string(tmp.path().join(GITATTRIBUTES)).unwrap(); + assert!(content.contains("*.bin filter=oxen diff=oxen merge=oxen -text")); + } + + #[test] + fn test_track_is_idempotent() { + let tmp = TempDir::new().unwrap(); + track_pattern(tmp.path(), "*.bin").unwrap(); + track_pattern(tmp.path(), "*.bin").unwrap(); + + let content = std::fs::read_to_string(tmp.path().join(GITATTRIBUTES)).unwrap(); + assert_eq!( + content.matches("*.bin").count(), + 1, + "pattern should appear only once" + ); + } + + #[test] + fn test_track_multiple_patterns() { + let tmp = TempDir::new().unwrap(); + track_pattern(tmp.path(), "*.bin").unwrap(); + track_pattern(tmp.path(), "datasets/**").unwrap(); + + let patterns = list_tracked_patterns(tmp.path()).unwrap(); + assert_eq!(patterns, vec!["*.bin", "datasets/**"]); + } + + #[test] + fn test_untrack_removes_pattern() { + let tmp = TempDir::new().unwrap(); + track_pattern(tmp.path(), "*.bin").unwrap(); + track_pattern(tmp.path(), "*.pt").unwrap(); + untrack_pattern(tmp.path(), "*.bin").unwrap(); + + let patterns = list_tracked_patterns(tmp.path()).unwrap(); + assert_eq!(patterns, vec!["*.pt"]); + } + + #[test] + fn test_untrack_noop_when_missing() { + let tmp = TempDir::new().unwrap(); + // No error when file doesn't exist. + untrack_pattern(tmp.path(), "*.bin").unwrap(); + } + + #[test] + fn test_list_empty() { + let tmp = TempDir::new().unwrap(); + let patterns = list_tracked_patterns(tmp.path()).unwrap(); + assert!(patterns.is_empty()); + } +} diff --git a/oxen-rust/src/lib/src/lfs/hooks.rs b/oxen-rust/src/lib/src/lfs/hooks.rs new file mode 100644 index 000000000..f2de06b13 --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/hooks.rs @@ -0,0 +1,215 @@ +use std::path::Path; + +use crate::error::OxenError; + +/// Marker comment used to identify our hook sections. +const HOOK_MARKER: &str = "# oxen lfs"; + +/// Install pre-push, post-checkout, and post-merge hooks into `.git/hooks/`. +/// +/// `oxen_bin` is the absolute path to the `oxen` executable that the hooks +/// will invoke. This avoids depending on `oxen` being on PATH. +/// +/// Idempotent: checks for existing `oxen lfs` content before appending. +/// Respects existing hook scripts by appending rather than overwriting. +pub fn install_hooks(git_dir: &Path, oxen_bin: &Path) -> Result<(), OxenError> { + let hooks_dir = git_dir.join("hooks"); + std::fs::create_dir_all(&hooks_dir)?; + + let bin = shell_quote(oxen_bin); + + install_hook( + &hooks_dir, + "pre-push", + &format!( + r#"{HOOK_MARKER} +if [ ! -x "{bin}" ]; then + echo >&2 "oxen not found at {bin}, skipping LFS pre-push hook" + exit 0 +fi +{bin} lfs push "$@" +"# + ), + )?; + + install_hook( + &hooks_dir, + "post-checkout", + &format!( + r#"{HOOK_MARKER} +[ -x "{bin}" ] || exit 0 +{bin} lfs pull --local +"# + ), + )?; + + install_hook( + &hooks_dir, + "post-merge", + &format!( + r#"{HOOK_MARKER} +[ -x "{bin}" ] || exit 0 +{bin} lfs pull --local +"# + ), + )?; + + Ok(()) +} + +/// Shell-quote a path if it contains spaces, otherwise return as-is. +fn shell_quote(path: &Path) -> String { + let s = path.to_string_lossy(); + if s.contains(' ') { + format!("'{s}'") + } else { + s.into_owned() + } +} + +fn install_hook(hooks_dir: &Path, name: &str, snippet: &str) -> Result<(), OxenError> { + let hook_path = hooks_dir.join(name); + + let existing = if hook_path.exists() { + std::fs::read_to_string(&hook_path)? + } else { + String::new() + }; + + // Already installed? + if existing.contains(HOOK_MARKER) { + return Ok(()); + } + + let mut content = if existing.is_empty() { + "#!/bin/sh\n".to_string() + } else { + let mut s = existing; + if !s.ends_with('\n') { + s.push('\n'); + } + s + }; + + content.push('\n'); + content.push_str(snippet); + + std::fs::write(&hook_path, &content)?; + + // Make executable on Unix. + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = std::fs::metadata(&hook_path)?.permissions(); + perms.set_mode(0o755); + std::fs::set_permissions(&hook_path, perms)?; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + use tempfile::TempDir; + + fn test_bin() -> PathBuf { + PathBuf::from("/usr/local/bin/oxen") + } + + #[test] + fn test_install_hooks_creates_files() { + let tmp = TempDir::new().unwrap(); + let git_dir = tmp.path().join(".git"); + std::fs::create_dir_all(&git_dir).unwrap(); + + install_hooks(&git_dir, &test_bin()).unwrap(); + + let hooks_dir = git_dir.join("hooks"); + assert!(hooks_dir.join("pre-push").exists()); + assert!(hooks_dir.join("post-checkout").exists()); + assert!(hooks_dir.join("post-merge").exists()); + + // Check content uses full path. + let pre_push = std::fs::read_to_string(hooks_dir.join("pre-push")).unwrap(); + assert!(pre_push.contains("/usr/local/bin/oxen lfs push")); + assert!(pre_push.starts_with("#!/bin/sh")); + } + + #[test] + fn test_install_hooks_idempotent() { + let tmp = TempDir::new().unwrap(); + let git_dir = tmp.path().join(".git"); + std::fs::create_dir_all(&git_dir).unwrap(); + + install_hooks(&git_dir, &test_bin()).unwrap(); + install_hooks(&git_dir, &test_bin()).unwrap(); + + let pre_push = std::fs::read_to_string(git_dir.join("hooks/pre-push")).unwrap(); + assert_eq!( + pre_push.matches("lfs push").count(), + 1, + "should not duplicate hook content" + ); + } + + #[test] + fn test_install_hooks_preserves_existing() { + let tmp = TempDir::new().unwrap(); + let git_dir = tmp.path().join(".git"); + let hooks_dir = git_dir.join("hooks"); + std::fs::create_dir_all(&hooks_dir).unwrap(); + + // Pre-existing hook script. + std::fs::write( + hooks_dir.join("pre-push"), + "#!/bin/sh\necho 'existing hook'\n", + ) + .unwrap(); + + install_hooks(&git_dir, &test_bin()).unwrap(); + + let content = std::fs::read_to_string(hooks_dir.join("pre-push")).unwrap(); + assert!( + content.contains("existing hook"), + "should preserve existing" + ); + assert!( + content.contains("/usr/local/bin/oxen lfs push"), + "should add our hook with full path" + ); + } + + #[test] + fn test_install_hooks_with_spaces_in_path() { + let tmp = TempDir::new().unwrap(); + let git_dir = tmp.path().join(".git"); + std::fs::create_dir_all(&git_dir).unwrap(); + + let bin = PathBuf::from("/path with spaces/oxen"); + install_hooks(&git_dir, &bin).unwrap(); + + let pre_push = std::fs::read_to_string(git_dir.join("hooks/pre-push")).unwrap(); + assert!( + pre_push.contains("'/path with spaces/oxen' lfs push"), + "should quote path with spaces" + ); + } + + #[cfg(unix)] + #[test] + fn test_hooks_are_executable() { + use std::os::unix::fs::PermissionsExt; + + let tmp = TempDir::new().unwrap(); + let git_dir = tmp.path().join(".git"); + std::fs::create_dir_all(&git_dir).unwrap(); + + install_hooks(&git_dir, &test_bin()).unwrap(); + + let meta = std::fs::metadata(git_dir.join("hooks/pre-push")).unwrap(); + let mode = meta.permissions().mode(); + assert!(mode & 0o111 != 0, "hook should be executable"); + } +} diff --git a/oxen-rust/src/lib/src/lfs/install.rs b/oxen-rust/src/lib/src/lfs/install.rs new file mode 100644 index 000000000..5e00a1bef --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/install.rs @@ -0,0 +1,116 @@ +use std::path::Path; +use std::process::Command; + +use crate::error::OxenError; + +/// Resolve the canonical absolute path of the running `oxen` binary. +pub fn current_exe_path() -> Result { + let exe = std::env::current_exe().map_err(|e| { + OxenError::basic_str(format!("failed to determine current executable: {e}")) + })?; + let canonical = exe.canonicalize().map_err(|e| { + OxenError::basic_str(format!( + "failed to canonicalize executable path {}: {e}", + exe.display() + )) + })?; + canonical.to_str().map(|s| s.to_string()).ok_or_else(|| { + OxenError::basic_str(format!( + "executable path is not valid UTF-8: {}", + canonical.display() + )) + }) +} + +/// Configure Git's global filter driver so that every repository +/// using `filter=oxen` will invoke our clean/smudge process. +/// +/// `oxen_bin` is the absolute path to the `oxen` executable. +/// +/// Sets in `~/.gitconfig`: +/// ```text +/// [filter "oxen"] +/// process = /full/path/to/oxen lfs filter-process +/// required = true +/// clean = /full/path/to/oxen lfs clean -- %f +/// smudge = /full/path/to/oxen lfs smudge -- %f +/// ``` +pub fn install_global_filter(oxen_bin: &Path) -> Result<(), OxenError> { + let bin = shell_quote(oxen_bin); + git_config_global("filter.oxen.process", &format!("{bin} lfs filter-process"))?; + git_config_global("filter.oxen.required", "true")?; + git_config_global("filter.oxen.clean", &format!("{bin} lfs clean -- %f"))?; + git_config_global("filter.oxen.smudge", &format!("{bin} lfs smudge -- %f"))?; + Ok(()) +} + +/// Shell-quote a path if it contains spaces, otherwise return as-is. +fn shell_quote(path: &Path) -> String { + let s = path.to_string_lossy(); + if s.contains(' ') { + format!("'{s}'") + } else { + s.into_owned() + } +} + +/// Remove the global filter driver configuration. +pub fn uninstall_global_filter() -> Result<(), OxenError> { + // --remove-section fails if the section doesn't exist, so ignore errors. + let _ = Command::new("git") + .args(["config", "--global", "--remove-section", "filter.oxen"]) + .output(); + Ok(()) +} + +fn git_config_global(key: &str, value: &str) -> Result<(), OxenError> { + let output = Command::new("git") + .args(["config", "--global", key, value]) + .output() + .map_err(|e| OxenError::basic_str(format!("failed to run git config: {e}")))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(OxenError::basic_str(format!( + "git config --global {key} failed: {stderr}" + ))); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + // Note: We don't run install/uninstall in tests to avoid modifying the + // developer's actual ~/.gitconfig. Integration tests with isolated HOME + // can cover this. + + use super::*; + + #[test] + fn test_install_and_uninstall_do_not_panic() { + // Smoke test: just verify the functions can be called without panic. + // Actual git config changes are tested in integration tests. + let exe = std::path::PathBuf::from("/usr/local/bin/oxen"); + let _ = install_global_filter(&exe); + let _ = uninstall_global_filter(); + } + + #[test] + fn test_current_exe_path_returns_string() { + // Should succeed in any test environment. + let path = current_exe_path().unwrap(); + assert!(!path.is_empty()); + } + + #[test] + fn test_shell_quote_no_spaces() { + let p = std::path::Path::new("/usr/local/bin/oxen"); + assert_eq!(shell_quote(p), "/usr/local/bin/oxen"); + } + + #[test] + fn test_shell_quote_with_spaces() { + let p = std::path::Path::new("/path with spaces/oxen"); + assert_eq!(shell_quote(p), "'/path with spaces/oxen'"); + } +} diff --git a/oxen-rust/src/lib/src/lfs/pointer.rs b/oxen-rust/src/lib/src/lfs/pointer.rs new file mode 100644 index 000000000..63a7c0930 --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/pointer.rs @@ -0,0 +1,161 @@ +use std::fmt; + +/// First line of every Oxen LFS pointer file. +pub const POINTER_VERSION_LINE: &str = "version https://oxen.ai/spec/v1"; + +/// Hash algorithm identifier used in pointer files. +pub const HASH_ALGO: &str = "xxh3"; + +/// Pointer files should never exceed this size in bytes. +pub const MAX_POINTER_SIZE: usize = 200; + +/// Represents an Oxen LFS pointer — a small stand-in stored in Git +/// that references content kept in the Oxen version store. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PointerFile { + /// 32-char lowercase hex hash (no algorithm prefix). + pub oid: String, + /// Size in bytes of the original content. + pub size: u64, +} + +impl PointerFile { + pub fn new(hash: &str, size: u64) -> Self { + Self { + oid: hash.to_string(), + size, + } + } + + /// Serialize to the canonical pointer text (UTF-8, newline-terminated). + pub fn encode(&self) -> Vec { + self.to_string().into_bytes() + } + + /// Try to parse a byte slice as a pointer file. + /// Returns `None` when the data is not a valid pointer. + pub fn decode(data: &[u8]) -> Option { + if data.len() > MAX_POINTER_SIZE { + return None; + } + + let text = std::str::from_utf8(data).ok()?; + let mut lines = text.lines(); + + // Line 1: version + let version_line = lines.next()?; + if version_line != POINTER_VERSION_LINE { + return None; + } + + // Line 2: oid : + let oid_line = lines.next()?; + let oid_value = oid_line.strip_prefix("oid ")?; + let hash = oid_value.strip_prefix(&format!("{HASH_ALGO}:"))?; + + // Validate hex string (should be 32 chars for xxh3_128) + if hash.len() != 32 || !hash.chars().all(|c| c.is_ascii_hexdigit()) { + return None; + } + + // Line 3: size + let size_line = lines.next()?; + let size_value = size_line.strip_prefix("size ")?; + let size: u64 = size_value.parse().ok()?; + + // No extra lines allowed (other than a trailing newline which .lines() skips) + if lines.next().is_some() { + return None; + } + + Some(Self { + oid: hash.to_string(), + size, + }) + } + + /// Quick check: is this byte slice a valid Oxen LFS pointer? + pub fn is_pointer(data: &[u8]) -> bool { + if data.len() > MAX_POINTER_SIZE { + return false; + } + // Fast path: check the version prefix before full parse. + data.starts_with(POINTER_VERSION_LINE.as_bytes()) && Self::decode(data).is_some() + } +} + +impl fmt::Display for PointerFile { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}\noid {}:{}\nsize {}\n", + POINTER_VERSION_LINE, HASH_ALGO, self.oid, self.size, + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pointer_roundtrip() { + let ptr = PointerFile::new("a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8", 1234567890); + let encoded = ptr.encode(); + let decoded = PointerFile::decode(&encoded).expect("should decode"); + assert_eq!(ptr, decoded); + } + + #[test] + fn test_pointer_format() { + let ptr = PointerFile::new("a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8", 42); + let text = String::from_utf8(ptr.encode()).unwrap(); + assert_eq!( + text, + "version https://oxen.ai/spec/v1\noid xxh3:a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8\nsize 42\n" + ); + } + + #[test] + fn test_is_pointer_true() { + let ptr = PointerFile::new("a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8", 100); + assert!(PointerFile::is_pointer(&ptr.encode())); + } + + #[test] + fn test_is_pointer_false_random_data() { + assert!(!PointerFile::is_pointer( + b"hello world, this is not a pointer" + )); + } + + #[test] + fn test_is_pointer_false_too_large() { + let big = vec![b'x'; MAX_POINTER_SIZE + 1]; + assert!(!PointerFile::is_pointer(&big)); + } + + #[test] + fn test_decode_rejects_bad_hash_length() { + let bad = b"version https://oxen.ai/spec/v1\noid xxh3:abc123\nsize 10\n"; + assert!(PointerFile::decode(bad).is_none()); + } + + #[test] + fn test_decode_rejects_extra_lines() { + let bad = b"version https://oxen.ai/spec/v1\noid xxh3:a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8\nsize 10\nextra\n"; + assert!(PointerFile::decode(bad).is_none()); + } + + #[test] + fn test_decode_rejects_wrong_version() { + let bad = b"version https://git-lfs.github.com/spec/v1\noid xxh3:a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8\nsize 10\n"; + assert!(PointerFile::decode(bad).is_none()); + } + + #[test] + fn test_decode_rejects_wrong_algorithm() { + let bad = b"version https://oxen.ai/spec/v1\noid sha256:a1b2c3d4e5f6a7b8a1b2c3d4e5f6a7b8\nsize 10\n"; + assert!(PointerFile::decode(bad).is_none()); + } +} diff --git a/oxen-rust/src/lib/src/lfs/status.rs b/oxen-rust/src/lib/src/lfs/status.rs new file mode 100644 index 000000000..6ab8f7abe --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/status.rs @@ -0,0 +1,121 @@ +use std::path::{Path, PathBuf}; + +use crate::error::OxenError; +use crate::lfs::gitattributes; +use crate::lfs::pointer::PointerFile; +use crate::storage::version_store::VersionStore; +use crate::storage::LocalVersionStore; + +/// Status information for a single LFS-tracked file. +#[derive(Debug)] +pub struct LfsFileStatus { + /// Path relative to repo root. + pub path: PathBuf, + /// The parsed pointer. + pub pointer: PointerFile, + /// Whether the actual content is available in the local version store. + pub local: bool, +} + +/// Walk the working tree, find pointer files that match tracked patterns, +/// and report their status. +pub async fn get_status( + repo_root: &Path, + versions_dir: &Path, +) -> Result, OxenError> { + let patterns = gitattributes::list_tracked_patterns(repo_root)?; + if patterns.is_empty() { + return Ok(Vec::new()); + } + + let store = LocalVersionStore::new(versions_dir); + let mut results = Vec::new(); + + // Build glob matchers. + let matchers: Vec = patterns + .iter() + .filter_map(|p| glob::Pattern::new(p).ok()) + .collect(); + + // Walk the working tree. + for entry in walkdir::WalkDir::new(repo_root) + .into_iter() + .filter_entry(|e| { + // Skip .git and .oxen directories. + let name = e.file_name().to_string_lossy(); + name != ".git" && name != ".oxen" + }) + { + let entry = match entry { + Ok(e) => e, + Err(_) => continue, + }; + + if !entry.file_type().is_file() { + continue; + } + + let rel_path = match entry.path().strip_prefix(repo_root) { + Ok(p) => p, + Err(_) => continue, + }; + + let rel_str = rel_path.to_string_lossy(); + + // Check if the file matches any tracked pattern. + let matched = matchers.iter().any(|m| m.matches(&rel_str)); + if !matched { + continue; + } + + // Read the file and check if it's a pointer. + let data = match std::fs::read(entry.path()) { + Ok(d) => d, + Err(_) => continue, + }; + + if let Some(pointer) = PointerFile::decode(&data) { + let local = store.version_exists(&pointer.oid).await.unwrap_or(false); + results.push(LfsFileStatus { + path: rel_path.to_path_buf(), + pointer, + local, + }); + } + } + + Ok(results) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lfs::filter; + use crate::lfs::gitattributes; + use tempfile::TempDir; + + #[tokio::test] + async fn test_status_finds_pointer_files() { + let tmp = TempDir::new().unwrap(); + let repo_root = tmp.path(); + let oxen_dir = repo_root.join(".oxen"); + let versions_dir = oxen_dir.join("versions"); + std::fs::create_dir_all(&versions_dir).unwrap(); + + // Track *.bin + gitattributes::track_pattern(repo_root, "*.bin").unwrap(); + + // Create a pointer file by running clean. + let content = b"binary content here"; + let pointer_bytes = filter::clean(&versions_dir, content).await.unwrap(); + std::fs::write(repo_root.join("model.bin"), &pointer_bytes).unwrap(); + + // Create a non-matching file. + std::fs::write(repo_root.join("readme.txt"), b"hello").unwrap(); + + let statuses = get_status(repo_root, &versions_dir).await.unwrap(); + assert_eq!(statuses.len(), 1); + assert_eq!(statuses[0].path, PathBuf::from("model.bin")); + assert!(statuses[0].local); + } +} diff --git a/oxen-rust/src/lib/src/lfs/sync.rs b/oxen-rust/src/lib/src/lfs/sync.rs new file mode 100644 index 000000000..957298997 --- /dev/null +++ b/oxen-rust/src/lib/src/lfs/sync.rs @@ -0,0 +1,410 @@ +use std::path::{Path, PathBuf}; +use std::process::Command; + +use crate::api; +use crate::config::UserConfig; +use crate::constants::DEFAULT_BRANCH_NAME; +use crate::error::OxenError; +use crate::lfs::config::LfsConfig; +use crate::lfs::filter; +use crate::lfs::pointer::PointerFile; +use crate::lfs::status; +use crate::model::NewCommitBody; +use crate::storage::version_store::VersionStore; +use crate::storage::LocalVersionStore; + +/// Push large file versions to the configured Oxen remote. +/// +/// Called by the pre-push hook or the `oxen lfs push` CLI command. +/// `hook_args` receives the hook arguments (remote name and URL) passed +/// by Git; logged for debugging. +pub async fn push_to_remote( + repo_root: &Path, + oxen_dir: &Path, + hook_args: &[String], +) -> Result<(), OxenError> { + log::debug!("oxen lfs push: hook_args={hook_args:?}"); + + let versions_dir = oxen_dir.join("versions"); + std::fs::create_dir_all(&versions_dir).ok(); + + let lfs_config = LfsConfig::load(oxen_dir)?; + let remote_repo = match lfs_config.resolve_remote().await? { + Some(r) => r, + None => { + log::info!("oxen lfs push: no remote configured, skipping"); + return Ok(()); + } + }; + + let statuses = status::get_status(repo_root, &versions_dir).await?; + let to_push: Vec<_> = statuses.iter().filter(|s| s.local).collect(); + if to_push.is_empty() { + log::info!("oxen lfs push: nothing to push"); + return Ok(()); + } + + let store = LocalVersionStore::new(&versions_dir); + + // Build a temporary staging directory mirroring the files' real repo-relative paths. + // `add_files` expects absolute paths rooted under a common base directory. + let staging_dir = tempfile::tempdir().map_err(|e| { + OxenError::basic_str(format!("oxen lfs push: failed to create staging dir: {e}")) + })?; + + let mut staged_paths: Vec = Vec::new(); + for file_status in &to_push { + let src = store.get_version_path(&file_status.pointer.oid)?; + let dest = staging_dir.path().join(&file_status.path); + if let Some(parent) = dest.parent() { + std::fs::create_dir_all(parent)?; + } + // Prefer hard-link to avoid copying; fall back to copy. + if std::fs::hard_link(&src, &dest).is_err() { + std::fs::copy(&src, &dest)?; + } + staged_paths.push(dest); + } + + let workspace_id = uuid::Uuid::new_v4().to_string(); + + // Create workspace → upload files → commit. On any error, attempt cleanup. + let result = push_workspace( + &remote_repo, + &workspace_id, + staging_dir.path(), + &staged_paths, + ) + .await; + + if let Err(ref e) = result { + log::warn!("oxen lfs push: push failed ({e}), cleaning up workspace"); + if let Err(del_err) = api::client::workspaces::delete(&remote_repo, &workspace_id).await { + log::warn!("oxen lfs push: workspace cleanup failed: {del_err}"); + } + } + + result?; + + println!( + "oxen lfs push: uploaded {} file(s) to {}", + to_push.len(), + remote_repo.url() + ); + Ok(()) +} + +/// Inner helper: create workspace, add files, commit. +async fn push_workspace( + remote_repo: &crate::model::RemoteRepository, + workspace_id: &str, + staging_dir: &Path, + staged_paths: &[PathBuf], +) -> Result<(), OxenError> { + api::client::workspaces::create(remote_repo, DEFAULT_BRANCH_NAME, workspace_id).await?; + + api::client::workspaces::files::add_files( + remote_repo, + workspace_id, + staging_dir, + staged_paths.to_vec(), + ) + .await?; + + let user_config = UserConfig::get()?; + let body = NewCommitBody { + message: "oxen lfs push: sync large files".to_string(), + author: user_config.name, + email: user_config.email, + }; + + api::client::workspaces::commits::commit(remote_repo, DEFAULT_BRANCH_NAME, workspace_id, &body) + .await?; + + Ok(()) +} + +/// Pull large file content and restore pointer files in the working tree. +/// +/// When `local_only` is true, only restores from the local `.oxen/versions/` +/// store (no network). This is used by post-checkout and post-merge hooks. +pub async fn pull_from_remote( + repo_root: &Path, + oxen_dir: &Path, + local_only: bool, +) -> Result<(), OxenError> { + let versions_dir = oxen_dir.join("versions"); + std::fs::create_dir_all(&versions_dir).ok(); + + let statuses = status::get_status(repo_root, &versions_dir).await?; + + let store = LocalVersionStore::new(&versions_dir); + let lfs_config = LfsConfig::load(oxen_dir)?; + let mut restored_paths: Vec = Vec::new(); + let mut need_remote: Vec<&status::LfsFileStatus> = Vec::new(); + + for file_status in &statuses { + if file_status.local { + // Content is available locally — restore the actual file. + let dest = repo_root.join(&file_status.path); + store + .copy_version_to_path(&file_status.pointer.oid, &dest) + .await?; + restored_paths.push(file_status.path.clone()); + } else { + // Try smudge (which checks origin for local clones). + let pointer_data = file_status.pointer.encode(); + let result = + filter::smudge(&versions_dir, repo_root, &lfs_config, &pointer_data).await?; + if !PointerFile::is_pointer(&result) { + // Smudge resolved it — write to working tree. + let dest = repo_root.join(&file_status.path); + std::fs::write(&dest, &result)?; + restored_paths.push(file_status.path.clone()); + } else if !local_only { + need_remote.push(file_status); + } + } + } + + // Batch-download any remaining files from the Oxen remote. + if !need_remote.is_empty() { + if let Some(remote_repo) = lfs_config.resolve_remote().await? { + let hashes: Vec = need_remote.iter().map(|s| s.pointer.oid.clone()).collect(); + api::client::versions::download_versions_to_store(&remote_repo, &hashes, &store) + .await?; + + // Restore the now-downloaded files to the working tree. + for file_status in &need_remote { + let dest = repo_root.join(&file_status.path); + store + .copy_version_to_path(&file_status.pointer.oid, &dest) + .await?; + restored_paths.push(file_status.path.clone()); + } + } else { + for s in &need_remote { + log::warn!( + "oxen lfs pull: {} not available locally and no remote configured", + s.path.display() + ); + } + } + } + + if !restored_paths.is_empty() { + // Re-add restored files so Git's index stat cache reflects the new + // on-disk content. The clean filter produces the same pointer blob, + // so no actual index change occurs — only the stat cache is updated. + git_add(repo_root, &restored_paths)?; + println!("oxen lfs pull: restored {} file(s)", restored_paths.len()); + } + + Ok(()) +} + +/// Force-synchronize ALL tracked pointer files in the working tree. +/// +/// For each pointer file that matches a tracked pattern: +/// 1. Try the local `.oxen/versions/` store. +/// 2. Try the origin's `.oxen/versions/` (for local clones). +/// 3. Try the configured Oxen remote. +/// 4. If any file still cannot be resolved, return an error listing all failures. +/// +/// This is meant to be run explicitly by the user to guarantee every +/// pointer is replaced with actual content. +pub async fn fetch_all(repo_root: &Path, oxen_dir: &Path) -> Result<(), OxenError> { + let versions_dir = oxen_dir.join("versions"); + std::fs::create_dir_all(&versions_dir).ok(); + + let lfs_config = LfsConfig::load(oxen_dir)?; + let statuses = status::get_status(repo_root, &versions_dir).await?; + + if statuses.is_empty() { + println!("oxen lfs fetch-all: no tracked pointer files found"); + return Ok(()); + } + + let store = LocalVersionStore::new(&versions_dir); + let mut restored_paths: Vec = Vec::new(); + let mut unresolved: Vec<&status::LfsFileStatus> = Vec::new(); + + for file_status in &statuses { + let dest = repo_root.join(&file_status.path); + + if file_status.local { + // Available in local store — restore directly. + store + .copy_version_to_path(&file_status.pointer.oid, &dest) + .await?; + restored_paths.push(file_status.path.clone()); + println!(" restored: {}", file_status.path.display()); + continue; + } + + // Try smudge (which checks origin for local clones). + let pointer_data = file_status.pointer.encode(); + let result = filter::smudge(&versions_dir, repo_root, &lfs_config, &pointer_data).await?; + + if PointerFile::is_pointer(&result) { + unresolved.push(file_status); + } else { + std::fs::write(&dest, &result)?; + restored_paths.push(file_status.path.clone()); + println!(" restored: {}", file_status.path.display()); + } + } + + // Try the configured Oxen remote for any remaining unresolved pointers. + if !unresolved.is_empty() { + if let Some(remote_repo) = lfs_config.resolve_remote().await? { + let hashes: Vec = unresolved.iter().map(|s| s.pointer.oid.clone()).collect(); + api::client::versions::download_versions_to_store(&remote_repo, &hashes, &store) + .await?; + + for file_status in &unresolved { + let dest = repo_root.join(&file_status.path); + store + .copy_version_to_path(&file_status.pointer.oid, &dest) + .await?; + restored_paths.push(file_status.path.clone()); + println!(" restored (remote): {}", file_status.path.display()); + } + unresolved.clear(); + } + } + + if !unresolved.is_empty() { + let failures: Vec = unresolved + .iter() + .map(|s| format!("{} (oid: {})", s.path.display(), s.pointer.oid)) + .collect(); + let msg = format!( + "oxen lfs fetch-all: {} file(s) could not be resolved:\n {}", + failures.len(), + failures.join("\n ") + ); + return Err(OxenError::basic_str(msg)); + } + + // Re-add restored files so Git's index stat cache reflects the new + // on-disk content. The clean filter produces the same pointer blob, + // so no actual index change occurs — only the stat cache is updated. + git_add(repo_root, &restored_paths)?; + + println!( + "oxen lfs fetch-all: all {} file(s) restored successfully", + restored_paths.len() + ); + Ok(()) +} + +/// Run `git add` on a list of paths so Git's index stat cache is updated. +/// +/// After we replace a pointer file with real content, the on-disk size and +/// mtime change. Without re-adding, `git status` shows the files as modified +/// even though the clean filter produces the identical blob. Re-adding lets +/// Git refresh its stat cache. +fn git_add(repo_root: &Path, paths: &[PathBuf]) -> Result<(), OxenError> { + if paths.is_empty() { + return Ok(()); + } + + let path_args: Vec<&str> = paths.iter().filter_map(|p| p.to_str()).collect(); + if path_args.is_empty() { + return Ok(()); + } + + let mut cmd = Command::new("git"); + cmd.arg("add").args(&path_args).current_dir(repo_root); + + let output = cmd + .output() + .map_err(|e| OxenError::basic_str(format!("oxen lfs: failed to spawn git add: {e}")))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + log::warn!("oxen lfs: git add exited with {}: {stderr}", output.status); + } + + Ok(()) +} + +/// Scan working tree for pointer files and return the list of OIDs +/// that need to be pushed. +pub async fn list_pushable_oids( + repo_root: &Path, + oxen_dir: &Path, +) -> Result, OxenError> { + let versions_dir = oxen_dir.join("versions"); + let statuses = status::get_status(repo_root, &versions_dir).await?; + Ok(statuses + .into_iter() + .filter(|s| s.local) + .map(|s| s.pointer) + .collect()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lfs::filter; + use crate::lfs::gitattributes; + use tempfile::TempDir; + + #[tokio::test] + async fn test_push_no_remote_configured() { + // With no remote_url in lfs.toml, push should succeed silently. + let tmp = TempDir::new().unwrap(); + let repo_root = tmp.path(); + let oxen_dir = repo_root.join(".oxen"); + std::fs::create_dir_all(&oxen_dir).unwrap(); + + // Save config with no remote. + let cfg = LfsConfig::default(); + cfg.save(&oxen_dir).unwrap(); + + let result = push_to_remote(repo_root, &oxen_dir, &[]).await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_pull_local_only_no_network() { + // local_only pull should not attempt network calls; it should + // restore files that are in the local store and skip the rest. + let tmp = TempDir::new().unwrap(); + let repo_root = tmp.path(); + let oxen_dir = repo_root.join(".oxen"); + let versions_dir = oxen_dir.join("versions"); + std::fs::create_dir_all(&versions_dir).unwrap(); + + // Track *.bin and create a pointer file whose content IS local. + gitattributes::track_pattern(repo_root, "*.bin").unwrap(); + let content = b"local binary content"; + let pointer_bytes = filter::clean(&versions_dir, content).await.unwrap(); + std::fs::write(repo_root.join("data.bin"), &pointer_bytes).unwrap(); + + // Save default config (no remote). + LfsConfig::default().save(&oxen_dir).unwrap(); + + let result = pull_from_remote(repo_root, &oxen_dir, true).await; + assert!(result.is_ok()); + + // The file should be restored to real content. + let on_disk = std::fs::read(repo_root.join("data.bin")).unwrap(); + assert_eq!(on_disk, content); + } + + #[tokio::test] + async fn test_git_add_returns_result() { + // git_add on an empty list should be Ok. + let tmp = TempDir::new().unwrap(); + let result = git_add(tmp.path(), &[]); + assert!(result.is_ok()); + + // git_add on a path in a non-git dir should still return Ok + // (git add will fail but we only warn on non-zero exit). + let result = git_add(tmp.path(), &[PathBuf::from("nonexistent.txt")]); + assert!(result.is_ok()); + } +} diff --git a/oxen-rust/src/lib/src/lib.rs b/oxen-rust/src/lib/src/lib.rs index 4e2a1aff8..3d7392b07 100644 --- a/oxen-rust/src/lib/src/lib.rs +++ b/oxen-rust/src/lib/src/lib.rs @@ -70,6 +70,7 @@ pub mod constants; pub mod core; pub mod error; pub mod io; +pub mod lfs; pub mod migrations; pub mod model; pub mod namespaces;