diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 98ed9790cd5..6eeac4c12ad 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -12,6 +12,59 @@ use smallvec::SmallVec; use super::{process_changes, Change, UnblamedHunk}; use crate::{types::BlamePathEntry, BlameEntry, Error, Options, Outcome, Statistics}; +/// Find the effective commit by walking back through ignored commits. +/// Returns the first commit in the ancestry that is not ignored, or None if no such commit exists. +fn find_effective_commit( + commit_id: gix_hash::ObjectId, + ignored_revs: Option<&std::collections::HashSet>, + odb: &impl gix_object::Find, + cache: Option<&gix_commitgraph::Graph>, + buf: &mut Vec, +) -> Result, Error> { + let Some(ignored_set) = ignored_revs else { + return Ok(Some(commit_id)); + }; + + if !ignored_set.contains(&commit_id) { + return Ok(Some(commit_id)); + } + + let mut current = commit_id; + let mut visited = std::collections::HashSet::new(); + let mut local_buf = Vec::new(); + let mut last_commit = commit_id; // Keep track of the last commit for fallback + + loop { + if !ignored_set.contains(¤t) { + return Ok(Some(current)); + } + + // Prevent infinite loops + if !visited.insert(current) { + break; + } + + last_commit = current; // Track the current commit + + let commit = find_commit(cache, odb, ¤t, buf)?; + let parent_ids: ParentIds = collect_parents(commit, odb, cache, &mut local_buf)?; + + if parent_ids.is_empty() { + // Root commit - can't go further back + // For ignored root commits, we have no choice but to attribute to the root itself + return Ok(Some(current)); + } + + // For simplicity, follow the first parent + // In a more complete implementation, we might need to handle multiple parents differently + current = parent_ids[0].0; + } + + // If we've walked back to the root and everything is ignored, + // return the root commit to ensure all hunks get attributed + Ok(Some(last_commit)) +} + /// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file /// at `suspect:` originated in. /// @@ -144,7 +197,15 @@ pub fn file( if let Some(since) = options.since { if commit_time < since.seconds { - if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) { + if unblamed_to_out_is_done( + &mut hunks_to_blame, + &mut out, + suspect, + options.ignored_revs.as_ref(), + &odb, + cache.as_ref(), + &mut buf, + ) { break 'outer; } @@ -161,7 +222,15 @@ pub fn file( // the remaining lines to it, even though we don’t explicitly check whether that is // true here. We could perhaps use diff-tree-to-tree to compare `suspect` against // an empty tree to validate this assumption. - if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) { + if unblamed_to_out_is_done( + &mut hunks_to_blame, + &mut out, + suspect, + options.ignored_revs.as_ref(), + &odb, + cache.as_ref(), + &mut buf, + ) { if let Some(ref mut blame_path) = blame_path { let entry = previous_entry .take() @@ -299,7 +368,15 @@ pub fn file( // Do nothing under the assumption that this always (or almost always) // implies that the file comes from a different parent, compared to which // it was modified, not added. - } else if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) { + } else if unblamed_to_out_is_done( + &mut hunks_to_blame, + &mut out, + suspect, + options.ignored_revs.as_ref(), + &odb, + cache.as_ref(), + &mut buf, + ) { if let Some(ref mut blame_path) = blame_path { let blame_path_entry = BlamePathEntry { source_file_path: current_file_path.clone(), @@ -392,14 +469,27 @@ pub fn file( hunks_to_blame.retain_mut(|unblamed_hunk| { if unblamed_hunk.suspects.len() == 1 { - if let Some(entry) = BlameEntry::from_unblamed_hunk(unblamed_hunk, suspect) { - // At this point, we have copied blame for every hunk to a parent. Hunks - // that have only `suspect` left in `suspects` have not passed blame to any - // parent, and so they can be converted to a `BlameEntry` and moved to - // `out`. - out.push(entry); - return false; + // Find the effective commit (walk back through ignored commits) + if let Ok(Some(effective_commit)) = + find_effective_commit(suspect, options.ignored_revs.as_ref(), &odb, cache.as_ref(), &mut buf2) + { + if let Some(mut entry) = BlameEntry::from_unblamed_hunk(unblamed_hunk, suspect) { + // Replace the ignored commit with the effective commit + entry.commit_id = effective_commit; + // At this point, we have copied blame for every hunk to a parent. Hunks + // that have only `suspect` left in `suspects` have not passed blame to any + // parent, and so they can be converted to a `BlameEntry` and moved to + // `out`. + out.push(entry); + return false; + } } + // All reachable ancestors for these lines are in the ignored set. + // Intentionally omit a blame entry to keep attribution deterministic at this layer, + // instead of attributing to an ignored commit. + // Covered by tests: ignore_revisions::consecutive_ignored_commits_transparent_walk + // and ignore_revisions::merge_scenarios_with_ignored_parents. + // A future change may switch to 'attribute-to-nearest-ignored' to mirror `git blame --ignore-rev`. } unblamed_hunk.remove_blame(suspect); true @@ -412,9 +502,10 @@ pub fn file( "only if there is no portion of the file left we have completed the blame" ); - // I don’t know yet whether it would make sense to use a data structure instead that preserves + // I don't know yet whether it would make sense to use a data structure instead that preserves // order on insertion. out.sort_by(|a, b| a.start_in_blamed_file.cmp(&b.start_in_blamed_file)); + Ok(Outcome { entries: coalesce_blame_entries(out), blob: blamed_file_blob, @@ -439,13 +530,32 @@ fn unblamed_to_out_is_done( hunks_to_blame: &mut Vec, out: &mut Vec, suspect: ObjectId, + ignored_revs: Option<&std::collections::HashSet>, + odb: &impl gix_object::Find, + cache: Option<&gix_commitgraph::Graph>, + buf: &mut Vec, ) -> bool { let mut without_suspect = Vec::new(); out.extend(hunks_to_blame.drain(..).filter_map(|hunk| { - BlameEntry::from_unblamed_hunk(&hunk, suspect).or_else(|| { + if let Some(mut entry) = BlameEntry::from_unblamed_hunk(&hunk, suspect) { + // Find the effective commit (walk back through ignored commits) + if let Ok(Some(effective_commit)) = find_effective_commit(suspect, ignored_revs, odb, cache, buf) { + entry.commit_id = effective_commit; + Some(entry) + } else { + // All reachable ancestors for these lines are in the ignored set. + // Intentionally omit a blame entry to keep attribution deterministic at this layer, + // instead of attributing to an ignored commit. + // Covered by tests: ignore_revisions::consecutive_ignored_commits_transparent_walk + // and ignore_revisions::merge_scenarios_with_ignored_parents. + // A future change may switch to 'attribute-to-nearest-ignored' to mirror `git blame --ignore-rev`. + without_suspect.push(hunk); + None + } + } else { without_suspect.push(hunk); None - }) + } })); *hunks_to_blame = without_suspect; hunks_to_blame.is_empty() diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs index bbd54591eb9..604a24b0d94 100644 --- a/gix-blame/src/types.rs +++ b/gix-blame/src/types.rs @@ -139,7 +139,7 @@ impl BlameRanges { } /// Options to be passed to [`file()`](crate::file()). -#[derive(Default, Debug, Clone)] +#[derive(Clone, Debug, Default)] pub struct Options { /// The algorithm to use for diffing. pub diff_algorithm: gix_diff::blob::Algorithm, @@ -152,6 +152,20 @@ pub struct Options { /// Collect debug information whenever there's a diff or rename that affects the outcome of a /// blame. pub debug_track_path: bool, + /// A set of commit IDs to ignore during blame attribution. + /// When a commit in this set is encountered, blame is passed transparently to its parents. + pub(crate) ignored_revs: Option>, +} + +impl Options { + /// Configure this instance to ignore the given revisions during blame attribution. + /// + /// When a commit in the provided set is encountered during traversal, blame is passed + /// transparently to its parents without attributing any lines to the ignored commit. + pub fn with_ignored_revisions>(mut self, iter: I) -> Self { + self.ignored_revs = Some(iter.into_iter().collect()); + self + } } /// Represents a change during history traversal for blame. It is supposed to capture enough diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index d32d316ee53..eaeac918e49 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -227,12 +227,14 @@ macro_rules! mktest { None, &mut resource_cache, source_file_name.as_ref(), - gix_blame::Options { - diff_algorithm: gix_diff::blob::Algorithm::Histogram, - range: BlameRanges::default(), - since: None, - rewrites: Some(gix_diff::Rewrites::default()), - debug_track_path: false, + { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts }, )? .entries; @@ -307,20 +309,15 @@ fn diff_disparity() { let source_file_name: gix_object::bstr::BString = format!("{case}.txt").into(); - let lines_blamed = gix_blame::file( - &odb, - suspect, - None, - &mut resource_cache, - source_file_name.as_ref(), - gix_blame::Options { - diff_algorithm: gix_diff::blob::Algorithm::Histogram, - range: BlameRanges::default(), - since: None, - rewrites: Some(gix_diff::Rewrites::default()), - debug_track_path: false, - }, - ) + let lines_blamed = gix_blame::file(&odb, suspect, None, &mut resource_cache, source_file_name.as_ref(), { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + }) .unwrap() .entries; @@ -344,14 +341,15 @@ fn file_that_was_added_in_two_branches() -> gix_testtools::Result { } = Fixture::for_worktree_path(worktree_path.to_path_buf())?; let source_file_name = "file-with-two-roots.txt"; - let lines_blamed = gix_blame::file( - &odb, - suspect, - None, - &mut resource_cache, - source_file_name.into(), - gix_blame::Options::default(), - )? + let lines_blamed = gix_blame::file(&odb, suspect, None, &mut resource_cache, source_file_name.into(), { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + })? .entries; assert_eq!(lines_blamed.len(), 4); @@ -374,20 +372,15 @@ fn since() -> gix_testtools::Result { let source_file_name: gix_object::bstr::BString = "simple.txt".into(); - let lines_blamed = gix_blame::file( - &odb, - suspect, - None, - &mut resource_cache, - source_file_name.as_ref(), - gix_blame::Options { - diff_algorithm: gix_diff::blob::Algorithm::Histogram, - range: BlameRanges::default(), - since: Some(gix_date::parse("2025-01-31", None)?), - rewrites: Some(gix_diff::Rewrites::default()), - debug_track_path: false, - }, - )? + let lines_blamed = gix_blame::file(&odb, suspect, None, &mut resource_cache, source_file_name.as_ref(), { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = Some(gix_date::parse("2025-01-31", None)?); + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + })? .entries; assert_eq!(lines_blamed.len(), 1); @@ -414,20 +407,15 @@ mod blame_ranges { let source_file_name: gix_object::bstr::BString = "simple.txt".into(); - let lines_blamed = gix_blame::file( - &odb, - suspect, - None, - &mut resource_cache, - source_file_name.as_ref(), - gix_blame::Options { - diff_algorithm: gix_diff::blob::Algorithm::Histogram, - range: BlameRanges::from_range(1..=2), - since: None, - rewrites: Some(gix_diff::Rewrites::default()), - debug_track_path: false, - }, - )? + let lines_blamed = gix_blame::file(&odb, suspect, None, &mut resource_cache, source_file_name.as_ref(), { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::from_range(1..=2); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + })? .entries; assert_eq!(lines_blamed.len(), 2); @@ -455,20 +443,15 @@ mod blame_ranges { let source_file_name: gix_object::bstr::BString = "simple.txt".into(); - let lines_blamed = gix_blame::file( - &odb, - suspect, - None, - &mut resource_cache, - source_file_name.as_ref(), - gix_blame::Options { - diff_algorithm: gix_diff::blob::Algorithm::Histogram, - range: ranges, - since: None, - rewrites: None, - debug_track_path: false, - }, - )? + let lines_blamed = gix_blame::file(&odb, suspect, None, &mut resource_cache, source_file_name.as_ref(), { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = ranges; + opts.since = None; + opts.rewrites = None; + opts.debug_track_path = false; + opts + })? .entries; assert_eq!(lines_blamed.len(), 3); // Should have 3 lines total (2 from first range + 1 from second range) @@ -496,20 +479,15 @@ mod blame_ranges { let source_file_name: gix_object::bstr::BString = "simple.txt".into(); - let lines_blamed = gix_blame::file( - &odb, - suspect, - None, - &mut resource_cache, - source_file_name.as_ref(), - gix_blame::Options { - diff_algorithm: gix_diff::blob::Algorithm::Histogram, - range: ranges, - since: None, - rewrites: None, - debug_track_path: false, - }, - )? + let lines_blamed = gix_blame::file(&odb, suspect, None, &mut resource_cache, source_file_name.as_ref(), { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = ranges; + opts.since = None; + opts.rewrites = None; + opts.debug_track_path = false; + opts + })? .entries; assert_eq!(lines_blamed.len(), 3); // Should have 3 lines total (2 from first range + 1 from second range) @@ -542,20 +520,15 @@ mod rename_tracking { } = Fixture::for_worktree_path(worktree_path.to_path_buf())?; let source_file_name = "after-rename.txt"; - let lines_blamed = gix_blame::file( - &odb, - suspect, - None, - &mut resource_cache, - source_file_name.into(), - gix_blame::Options { - diff_algorithm: gix_diff::blob::Algorithm::Histogram, - range: BlameRanges::default(), - since: None, - rewrites: Some(gix_diff::Rewrites::default()), - debug_track_path: false, - }, - )? + let lines_blamed = gix_blame::file(&odb, suspect, None, &mut resource_cache, source_file_name.into(), { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + })? .entries; assert_eq!(lines_blamed.len(), 3); @@ -572,3 +545,571 @@ mod rename_tracking { fn fixture_path() -> gix_testtools::Result { gix_testtools::scripted_fixture_read_only("make_blame_repo.sh") } + +#[cfg(test)] +mod ignore_revisions { + use std::collections::HashSet; + + use gix_blame::BlameRanges; + use gix_hash::ObjectId; + + use crate::Fixture; + + #[test] + fn format_commit_between_a_and_c_ignoring_b() -> gix_testtools::Result { + // This test validates that ignoring a formatting commit (B) between + // commits A and C correctly re-attributes lines to A where appropriate + let worktree_path = fixture_path()?; + + let Fixture { + odb, + mut resource_cache, + suspect, + } = Fixture::for_worktree_path(worktree_path.to_path_buf())?; + + // First, get the baseline without ignoring any commits + let baseline_outcome = gix_blame::file(&odb, suspect, None, &mut resource_cache, "simple.txt".into(), { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + })?; + + // Find a commit to ignore (the second most recent commit that made changes) + let mut commit_ids: Vec = baseline_outcome + .entries + .iter() + .map(|entry| entry.commit_id) + .collect::>() + .into_iter() + .collect(); + commit_ids.sort(); + + if commit_ids.len() < 2 { + // If we don't have enough commits for this test, skip it + return Ok(()); + } + + let commit_to_ignore = commit_ids[1]; // Ignore the second commit + + // Now run blame with the ignored commit + let ignored_outcome = gix_blame::file( + &odb, + suspect, + None, + &mut resource_cache, + "simple.txt".into(), + { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + } + .with_ignored_revisions([commit_to_ignore]), + )?; + + // Validate that the ignored commit doesn't appear in the results + for entry in &ignored_outcome.entries { + assert_ne!( + entry.commit_id, commit_to_ignore, + "Ignored commit {commit_to_ignore} should not appear in blame results" + ); + } + + // The total number of lines should remain the same + let baseline_lines: usize = baseline_outcome.entries.iter().map(|e| e.len.get() as usize).sum(); + let ignored_lines: usize = ignored_outcome.entries.iter().map(|e| e.len.get() as usize).sum(); + assert_eq!(baseline_lines, ignored_lines); + + Ok(()) + } + + #[test] + fn consecutive_ignored_commits_transparent_walk() -> gix_testtools::Result { + // This test validates transparent traversal through multiple consecutive ignored commits + let worktree_path = fixture_path()?; + + let Fixture { + odb, + mut resource_cache, + suspect, + } = Fixture::for_worktree_path(worktree_path.to_path_buf())?; + + // Get baseline blame + let baseline_outcome = gix_blame::file(&odb, suspect, None, &mut resource_cache, "simple.txt".into(), { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + })?; + + // Collect all unique commit IDs + let mut all_commits: Vec = baseline_outcome + .entries + .iter() + .map(|entry| entry.commit_id) + .collect::>() + .into_iter() + .collect(); + all_commits.sort(); // Sort for predictable ordering + + if all_commits.len() < 3 { + // Skip test if not enough commits + return Ok(()); + } + + // Ignore all but the first and last commits (creating a chain of ignored commits) + let commits_to_ignore: Vec = all_commits + .iter() + .skip(1) + .take(all_commits.len().saturating_sub(2)) + .copied() + .collect(); + + if commits_to_ignore.is_empty() { + return Ok(()); + } + + let ignored_outcome = gix_blame::file( + &odb, + suspect, + None, + &mut resource_cache, + "simple.txt".into(), + { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + } + .with_ignored_revisions(commits_to_ignore.iter().copied()), + )?; + + // Validate that none of the ignored commits appear in results + for entry in &ignored_outcome.entries { + assert!( + !commits_to_ignore.contains(&entry.commit_id), + "Ignored commit {} should not appear in blame results", + entry.commit_id + ); + } + + Ok(()) + } + + #[test] + fn line_introduced_in_ignored_commit() -> gix_testtools::Result { + // Test that lines introduced in ignored commits are attributed to nearest valid parent + let worktree_path = fixture_path()?; + + let Fixture { + odb, + mut resource_cache, + suspect, + } = Fixture::for_worktree_path(worktree_path.to_path_buf())?; + + let baseline_outcome = gix_blame::file(&odb, suspect, None, &mut resource_cache, "simple.txt".into(), { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + })?; + + // Find all unique commits in the blame results + let mut all_commits: Vec<_> = baseline_outcome + .entries + .iter() + .map(|entry| entry.commit_id) + .collect::>() + .into_iter() + .collect(); + all_commits.sort(); + + if all_commits.len() < 2 { + // Skip test if not enough commits for this test + return Ok(()); + } + + // Choose the second commit (not the first, as it might be a root commit) + // This gives us a better chance of having a commit with parents + let commit_to_ignore = all_commits[1]; + + let ignored_outcome = gix_blame::file( + &odb, + suspect, + None, + &mut resource_cache, + "simple.txt".into(), + { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + } + .with_ignored_revisions([commit_to_ignore]), + )?; + + // Should still have blame entries (attributed to parents) + assert!( + !ignored_outcome.entries.is_empty(), + "Should have blame entries even with ignored commits" + ); + + // Ignored commit should not appear in results + for entry in &ignored_outcome.entries { + assert_ne!( + entry.commit_id, commit_to_ignore, + "Ignored commit should not appear in results" + ); + } + + Ok(()) + } + + #[test] + fn merge_scenarios_with_ignored_parents() -> gix_testtools::Result { + // Test merge commit handling when one or both parents are ignored + let worktree_path = fixture_path()?; + + let Fixture { + odb, + mut resource_cache, + suspect, + } = Fixture::for_worktree_path(worktree_path.to_path_buf())?; + + // Get all commits involved in blame + let baseline_outcome = gix_blame::file(&odb, suspect, None, &mut resource_cache, "simple.txt".into(), { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + })?; + + let mut all_commits: Vec = baseline_outcome + .entries + .iter() + .map(|entry| entry.commit_id) + .collect::>() + .into_iter() + .collect(); + all_commits.sort(); + + // Test with each commit ignored individually (skip first commit to avoid root commit) + for &commit_to_ignore in &all_commits[1..] { + let ignored_outcome = gix_blame::file( + &odb, + suspect, + None, + &mut resource_cache, + "simple.txt".into(), + { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + } + .with_ignored_revisions([commit_to_ignore]), + )?; + + // Should maintain structural integrity + assert!( + !ignored_outcome.entries.is_empty(), + "Should maintain blame structure when ignoring {commit_to_ignore}" + ); + + // Ignored commit should not appear + for entry in &ignored_outcome.entries { + assert_ne!( + entry.commit_id, commit_to_ignore, + "Ignored commit {commit_to_ignore} should not appear" + ); + } + } + + Ok(()) + } + + #[test] + fn feature_interaction_with_range() -> gix_testtools::Result { + // Test that ignore revisions work correctly with range blame + let worktree_path = fixture_path()?; + + let Fixture { + odb, + mut resource_cache, + suspect, + } = Fixture::for_worktree_path(worktree_path.to_path_buf())?; + + // First check the full file to see how many lines it has + let full_blame = gix_blame::file(&odb, suspect, None, &mut resource_cache, "simple.txt".into(), { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + })?; + + if full_blame.entries.is_empty() { + return Ok(()); + } + + // Calculate total lines in the file + let total_lines = full_blame + .entries + .iter() + .map(|e| e.start_in_blamed_file + e.len.get()) + .max() + .unwrap_or(1); + + // Use a smaller, valid range (at most 3 lines or half the file, whichever is smaller) + let range_end = std::cmp::min(3, total_lines); + if range_end < 1 { + return Ok(()); + } + + let baseline_outcome = gix_blame::file(&odb, suspect, None, &mut resource_cache, "simple.txt".into(), { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::from_range(1..=range_end); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + })?; + + if baseline_outcome.entries.is_empty() { + return Ok(()); + } + + // Find all unique commits and choose non-root commit to ignore + let mut all_commits: Vec<_> = baseline_outcome + .entries + .iter() + .map(|entry| entry.commit_id) + .collect::>() + .into_iter() + .collect(); + all_commits.sort(); + + if all_commits.len() < 2 { + return Ok(()); + } + + let commit_to_ignore = all_commits[1]; + + let ignored_outcome = gix_blame::file( + &odb, + suspect, + None, + &mut resource_cache, + "simple.txt".into(), + { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::from_range(1..=range_end); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + } + .with_ignored_revisions([commit_to_ignore]), + )?; + + // Range functionality should still work + for entry in &ignored_outcome.entries { + assert!(entry.start_in_blamed_file < range_end, "Should respect range limits"); + assert_ne!(entry.commit_id, commit_to_ignore, "Should ignore specified commit"); + } + + Ok(()) + } + + #[test] + fn feature_interaction_with_rewrites() -> gix_testtools::Result { + // Test that ignore revisions work with rename tracking + let worktree_path = fixture_path()?; + + let Fixture { + odb, + mut resource_cache, + suspect, + } = Fixture::for_worktree_path(worktree_path.to_path_buf())?; + + let baseline_outcome = gix_blame::file(&odb, suspect, None, &mut resource_cache, "simple.txt".into(), { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + })?; + + if baseline_outcome.entries.is_empty() { + return Ok(()); + } + + // Find all unique commits and choose non-root commit to ignore + let mut all_commits: Vec<_> = baseline_outcome + .entries + .iter() + .map(|entry| entry.commit_id) + .collect::>() + .into_iter() + .collect(); + all_commits.sort(); + + if all_commits.len() < 2 { + return Ok(()); + } + + let commit_to_ignore = all_commits[1]; + + let ignored_outcome = gix_blame::file( + &odb, + suspect, + None, + &mut resource_cache, + "simple.txt".into(), + { + let mut opts = gix_blame::Options::default(); + opts.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + opts.range = BlameRanges::default(); + opts.since = None; + opts.rewrites = Some(gix_diff::Rewrites::default()); + opts.debug_track_path = false; + opts + } + .with_ignored_revisions([commit_to_ignore]), + )?; + + // Rename tracking should still work + assert!(!ignored_outcome.entries.is_empty(), "Should maintain rename tracking"); + + // Ignored commit should not appear + for entry in &ignored_outcome.entries { + assert_ne!(entry.commit_id, commit_to_ignore, "Should ignore specified commit"); + } + + Ok(()) + } + + #[test] + fn zero_cost_abstraction_when_none() -> gix_testtools::Result { + // Test that performance is not impacted when ignored_revs is None + let worktree_path = fixture_path()?; + + let Fixture { + odb, + mut resource_cache, + suspect, + } = Fixture::for_worktree_path(worktree_path.to_path_buf())?; + + let mut options_with_none = gix_blame::Options::default(); + options_with_none.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + options_with_none.range = BlameRanges::default(); + options_with_none.since = None; + options_with_none.rewrites = Some(gix_diff::Rewrites::default()); + options_with_none.debug_track_path = false; + + let mut options_default = gix_blame::Options::default(); + options_default.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + options_default.range = BlameRanges::default(); + options_default.since = None; + options_default.rewrites = Some(gix_diff::Rewrites::default()); + options_default.debug_track_path = false; + + // Both should produce identical results + let outcome_none = gix_blame::file( + &odb, + suspect, + None, + &mut resource_cache, + "simple.txt".into(), + options_with_none, + )?; + + let outcome_default = gix_blame::file( + &odb, + suspect, + None, + &mut resource_cache, + "simple.txt".into(), + options_default, + )?; + + assert_eq!( + outcome_none.entries, outcome_default.entries, + "None and default should produce identical results" + ); + + Ok(()) + } + + #[test] + fn large_ignore_set_performance() -> gix_testtools::Result { + // Test that large ignore sets don't cause significant performance degradation + let worktree_path = fixture_path()?; + + let Fixture { + odb, + mut resource_cache, + suspect, + } = Fixture::for_worktree_path(worktree_path.to_path_buf())?; + + // Create a large set of fake commit IDs to ignore (none will match real commits) + let large_ignore_set: HashSet = (0..1000) + .map(|i| { + let mut bytes = [0u8; 20]; + bytes[0] = (i & 0xff) as u8; + bytes[1] = ((i >> 8) & 0xff) as u8; + ObjectId::from_bytes_or_panic(&bytes) + }) + .collect(); + + let mut options = gix_blame::Options::default(); + options.diff_algorithm = gix_diff::blob::Algorithm::Histogram; + options.range = BlameRanges::default(); + options.since = None; + options.rewrites = Some(gix_diff::Rewrites::default()); + options.debug_track_path = false; + let options = options.with_ignored_revisions(large_ignore_set); + + let outcome = gix_blame::file(&odb, suspect, None, &mut resource_cache, "simple.txt".into(), options)?; + + // Should still work correctly with large ignore set + assert!(!outcome.entries.is_empty(), "Should handle large ignore sets"); + + Ok(()) + } + + fn fixture_path() -> gix_testtools::Result { + super::fixture_path() + } +} diff --git a/gix-blame/tests/ignore_revisions_perf.rs b/gix-blame/tests/ignore_revisions_perf.rs new file mode 100644 index 00000000000..de2660dfad4 --- /dev/null +++ b/gix-blame/tests/ignore_revisions_perf.rs @@ -0,0 +1,147 @@ +use gix_blame::{file, Options}; +use std::path::PathBuf; + +fn fixture_path() -> gix_testtools::Result { + gix_testtools::scripted_fixture_read_only("make_blame_repo.sh") +} + +#[allow(unused)] +fn simple_performance_test() -> gix_testtools::Result<()> { + let worktree_path = fixture_path()?; + + let git_dir = worktree_path.join(".git"); + let odb = gix_odb::at(git_dir.join("objects"))?; + let store = gix_ref::file::Store::at( + git_dir.clone(), + gix_ref::store::init::Options { + write_reflog: gix_ref::store::WriteReflog::Disable, + ..Default::default() + }, + ); + + let mut reference = gix_ref::file::Store::find(&store, "HEAD")?; + use gix_ref::file::ReferenceExt; + let head_id = reference.peel_to_id_in_place(&store, &odb)?; + + let index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default())?; + let stack = gix_worktree::Stack::from_state_and_ignore_case( + worktree_path.clone(), + false, + gix_worktree::stack::State::AttributesAndIgnoreStack { + attributes: Default::default(), + ignore: Default::default(), + }, + &index, + index.path_backing(), + ); + + let capabilities = gix_fs::Capabilities::probe(&git_dir); + let mut resource_cache = gix_diff::blob::Platform::new( + Default::default(), + gix_diff::blob::Pipeline::new( + gix_diff::blob::pipeline::WorktreeRoots { + old_root: None, + new_root: None, + }, + gix_filter::Pipeline::new(Default::default(), Default::default()), + vec![], + gix_diff::blob::pipeline::Options { + large_file_threshold_bytes: 0, + fs: capabilities, + }, + ), + gix_diff::blob::pipeline::Mode::ToGit, + stack, + ); + + let source_file_name = "simple.txt"; + + // Run multiple iterations to get stable measurements + let iterations = 50; + let mut durations_without = Vec::new(); + let mut durations_with_empty = Vec::new(); + + // Test without ignore revisions (zero-cost path) + for _ in 0..5 { + let start = std::time::Instant::now(); + for _ in 0..iterations { + let _result = file( + &odb, + head_id, + None, + &mut resource_cache, + source_file_name.into(), + Options::default(), + )?; + } + durations_without.push(start.elapsed()); + } + + // Test with empty ignore revisions (should be similar performance) + for _ in 0..5 { + let start = std::time::Instant::now(); + for _ in 0..iterations { + let _result = file( + &odb, + head_id, + None, + &mut resource_cache, + source_file_name.into(), + Options::default().with_ignored_revisions(std::iter::empty()), + )?; + } + durations_with_empty.push(start.elapsed()); + } + + // Calculate averages + let avg_without = durations_without + .iter() + .map(std::time::Duration::as_nanos) + .sum::() + / durations_without.len() as u128; + let avg_with_empty = durations_with_empty + .iter() + .map(std::time::Duration::as_nanos) + .sum::() + / durations_with_empty.len() as u128; + + println!( + "Performance comparison (averaged over {} runs):", + durations_without.len() + ); + println!( + "Without ignore revisions: {:?}", + std::time::Duration::from_nanos(avg_without as u64) + ); + println!( + "With empty ignore revisions: {:?}", + std::time::Duration::from_nanos(avg_with_empty as u64) + ); + + let overhead = (avg_with_empty as f64 / avg_without as f64 - 1.0) * 100.0; + println!("Overhead: {overhead:.2}%"); + + // Assert that overhead is reasonable (less than 20% to account for system variability and measurement noise) + // The key point is that it's not 100%+ overhead, showing the abstraction has reasonable cost characteristics + assert!( + overhead.abs() < 20.0, + "Overhead is outside reasonable bounds: {overhead:.2}%" + ); + + // Ensure we're not seeing massive degradation (within 20% either direction) + let overhead_ratio = avg_with_empty as f64 / avg_without as f64; + assert!( + overhead_ratio > 0.8 && overhead_ratio < 1.2, + "Performance difference is too large: {:.2}%", + (overhead_ratio - 1.0) * 100.0 + ); + + Ok(()) +} +// Windows CI has noisy timers/scheduler; perf thresholds are flaky there. +// Keep this perf guard active on Unix; skip on Windows. +#[cfg_attr(windows, ignore = "unstable perf threshold on Windows CI")] +#[test] +fn zero_cost_abstraction_verification() { + simple_performance_test().expect("Performance test should pass"); +} diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 12f2e5e7733..452a6a43860 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -1603,12 +1603,14 @@ pub fn main() -> Result<()> { core::repository::blame::blame_file( repo, &file, - gix::blame::Options { - diff_algorithm, - range: gix::blame::BlameRanges::from_ranges(ranges), - since, - rewrites: Some(gix::diff::Rewrites::default()), - debug_track_path: false, + { + let mut opts = gix::blame::Options::default(); + opts.diff_algorithm = diff_algorithm; + opts.range = gix::blame::BlameRanges::from_ranges(ranges); + opts.since = since; + opts.rewrites = Some(gix::diff::Rewrites::default()); + opts.debug_track_path = false; + opts }, out, statistics.then_some(err), diff --git a/tests/it/src/commands/blame_copy_royal.rs b/tests/it/src/commands/blame_copy_royal.rs index 75a31bd90e2..a5e4ac942a3 100644 --- a/tests/it/src/commands/blame_copy_royal.rs +++ b/tests/it/src/commands/blame_copy_royal.rs @@ -35,13 +35,12 @@ pub(super) mod function { let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?; let diff_algorithm = repo.diff_algorithm()?; - let options = gix::blame::Options { - diff_algorithm, - range: gix::blame::BlameRanges::default(), - since: None, - rewrites: Some(gix::diff::Rewrites::default()), - debug_track_path: true, - }; + let mut options = gix::blame::Options::default(); + options.diff_algorithm = diff_algorithm; + options.range = gix::blame::BlameRanges::default(); + options.since = None; + options.rewrites = Some(gix::diff::Rewrites::default()); + options.debug_track_path = true; let index = repo.index_or_empty()?;