Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 137 additions & 8 deletions crates/weaver_common/src/vdir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,16 @@ const TAR_GZ_EXT: &str = ".tar.gz";
/// The extension for a zip archive.
const ZIP_EXT: &str = ".zip";

/// Returns `true` if `s` looks like a full-length hex commit SHA.
///
/// Git uses SHA-1 (40 hex chars) or SHA-256 (64 hex chars). This helper is
/// intentionally strict: it only matches full-length hashes so that short
/// branch/tag names that happen to be hex (e.g. `deadbeef`) are not
/// misidentified.
fn is_commit_sha(s: &str) -> bool {
(s.len() == 40 || s.len() == 64) && s.bytes().all(|b| b.is_ascii_hexdigit())
}

/// Regex to parse a virtual directory path string.
///
/// Supports the following general format: `source[@refspec][\[sub_folder]]`
Expand Down Expand Up @@ -649,32 +659,47 @@ impl VirtualDirectory {
message: e.to_string(),
})?;

// Determine whether the refspec is a commit SHA. `with_ref_name` only
// accepts symbolic refs (branches/tags) and panics on raw object IDs.
let is_sha = refspec.as_ref().is_some_and(|r| is_commit_sha(r));

let mut fetch = if refspec.is_none() {
prepare.with_shallow(Shallow::DepthAtRemote(
NonZeroU32::new(1).expect("1 is not zero"),
))
} else {
prepare
}
.with_ref_name(refspec.as_ref())
// Only pass the refspec to `with_ref_name` when it is a symbolic ref.
// Commit SHAs are handled via `checkout_sha` after fetching.
.with_ref_name(if is_sha { None } else { refspec.as_ref() })
.map_err(|e| GitError {
repo_url: url.to_owned(),
message: e.to_string(),
})?;

let (mut prepare, _outcome) = fetch
let (mut checkout, _outcome) = fetch
.fetch_then_checkout(progress::Discard, &AtomicBool::new(false))
.map_err(|e| GitError {
repo_url: url.to_owned(),
message: e.to_string(),
})?;

let (_repo, _outcome) = prepare
.main_worktree(progress::Discard, &AtomicBool::new(false))
.map_err(|e| GitError {
repo_url: url.to_owned(),
message: e.to_string(),
})?;
if is_sha {
// For commit SHAs we skip `main_worktree()` (which would checkout
// the default branch) and instead checkout the requested commit
// directly using gix APIs.
let sha = refspec.as_ref().expect("is_sha implies Some");
let repo = checkout.persist();
Self::checkout_sha(&repo, sha, url)?;
} else {
let _repo = checkout
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the result unused? Please add a comment or rationale behind what this is doing - is it for an effect? if so comment on what we expect to have happen and why we're not returning a value.

.main_worktree(progress::Discard, &AtomicBool::new(false))
.map_err(|e| GitError {
repo_url: url.to_owned(),
message: e.to_string(),
})?;
}

// Determines the final path to the repo taking into account the sub_folder.
let path = if let Some(sub_folder) = sub_folder {
Expand All @@ -701,6 +726,73 @@ impl VirtualDirectory {
})
}

/// Checkout a specific commit SHA in a cloned repository using gix APIs.
///
/// Resolves `sha` to a tree, builds an index, and writes the worktree.
/// This avoids the `main_worktree()` path which can only checkout HEAD or
/// a symbolic ref.
fn checkout_sha(repo: &gix::Repository, sha: &str, url: &str) -> Result<(), Error> {
let workdir = repo.workdir().ok_or_else(|| GitError {
repo_url: url.to_owned(),
message: "repository has no worktree".to_owned(),
})?;

let id = gix::ObjectId::from_hex(sha.as_bytes()).map_err(|e| GitError {
repo_url: url.to_owned(),
message: format!("invalid commit SHA {sha}: {e}"),
})?;

let tree_id = repo
.find_object(id)
.map_err(|e| GitError {
repo_url: url.to_owned(),
message: format!("commit {sha} not found: {e}"),
})?
.peel_to_tree()
.map_err(|e| GitError {
repo_url: url.to_owned(),
message: format!("failed to peel {sha} to tree: {e}"),
})?
.id;

let mut index = repo.index_from_tree(&tree_id).map_err(|e| GitError {
repo_url: url.to_owned(),
message: format!("failed to build index from tree: {e}"),
})?;

let mut opts = repo
.checkout_options(gix::worktree::stack::state::attributes::Source::IdMapping)
.map_err(|e| GitError {
repo_url: url.to_owned(),
message: format!("failed to get checkout options: {e}"),
})?;
opts.destination_is_initially_empty = true;

let _outcome = gix::worktree::state::checkout(
&mut index,
workdir,
repo.objects.clone().into_arc().map_err(|e| GitError {
repo_url: url.to_owned(),
message: format!("failed to create object store handle: {e}"),
})?,
&progress::Discard,
&progress::Discard,
&AtomicBool::new(false),
opts,
)
.map_err(|e| GitError {
repo_url: url.to_owned(),
message: format!("checkout failed: {e}"),
})?;

index.write(Default::default()).map_err(|e| GitError {
repo_url: url.to_owned(),
message: format!("failed to write index: {e}"),
})?;

Ok(())
}

/// Create a new `VirtualDirectory` from a local archive.
/// The archive can be in `.tar.gz` or `.zip` format.
/// The sub_folder is used to filter the entries inside the archive to unpack.
Expand Down Expand Up @@ -1629,4 +1721,41 @@ mod tests {
let err = find_asset_url(&no_url, "manifest.yaml", "v1", "orig").expect_err("missing url");
assert!(matches!(&err, RemoteFileDownloadFailed { error, .. } if error.contains("'url'")));
}

#[test]
fn test_is_commit_sha() {
use super::is_commit_sha;

// Valid SHA-1 (40 hex chars)
assert!(is_commit_sha(
"d84341cf20a1fef1a833ef44d318c41a770e6e64"
));
assert!(is_commit_sha(
"0000000000000000000000000000000000000000"
));
assert!(is_commit_sha(
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
));
// Valid SHA-256 (64 hex chars)
assert!(is_commit_sha(
"d84341cf20a1fef1a833ef44d318c41a770e6e64d84341cf20a1fef1a833ef44"
));

// Too short / too long
assert!(!is_commit_sha("d84341cf"));
assert!(!is_commit_sha("d84341cf20a1fef1a833ef44d318c41a770e6e6")); // 39 chars
assert!(!is_commit_sha(
"d84341cf20a1fef1a833ef44d318c41a770e6e640"
)); // 41 chars

// Non-hex characters
assert!(!is_commit_sha(
"g84341cf20a1fef1a833ef44d318c41a770e6e64"
));

// Symbolic refs
assert!(!is_commit_sha("main"));
assert!(!is_commit_sha("v1.0.0"));
assert!(!is_commit_sha("refs/heads/main"));
}
}