From 31e27fbed0aa7a0f158ef1c5dbb202ec47fe5c91 Mon Sep 17 00:00:00 2001 From: gghez Date: Tue, 24 Mar 2026 03:48:13 +0100 Subject: [PATCH] fix(diff): check modified count in identical-files guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "Files are identical" check only tested added and removed counters, ignoring modified lines. When two lines shared enough characters to be classified as a modification (similarity > 0.5), they incremented modified but not added/removed, so the guard passed and the diff reported no changes. Also replace the Jaccard set-based similarity function with a positional prefix+suffix metric that better reflects actual line differences — the old approach used unique character sets, which over-estimated similarity for lines sharing common characters in different positions. Fixes #781 Signed-off-by: gghez --- src/diff_cmd.rs | 78 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 65 insertions(+), 13 deletions(-) diff --git a/src/diff_cmd.rs b/src/diff_cmd.rs index d9299eb5..5faf58c3 100644 --- a/src/diff_cmd.rs +++ b/src/diff_cmd.rs @@ -21,7 +21,7 @@ pub fn run(file1: &Path, file2: &Path, verbose: u8) -> Result<()> { let diff = compute_diff(&lines1, &lines2); let mut rtk = String::new(); - if diff.added == 0 && diff.removed == 0 { + if diff.added == 0 && diff.removed == 0 && diff.modified == 0 { rtk.push_str("[ok] Files are identical"); println!("{}", rtk); timer.track( @@ -143,17 +143,26 @@ fn compute_diff(lines1: &[&str], lines2: &[&str]) -> DiffResult { } fn similarity(a: &str, b: &str) -> f64 { - let a_chars: std::collections::HashSet = a.chars().collect(); - let b_chars: std::collections::HashSet = b.chars().collect(); - - let intersection = a_chars.intersection(&b_chars).count(); - let union = a_chars.union(&b_chars).count(); - - if union == 0 { - 1.0 - } else { - intersection as f64 / union as f64 + let max_len = a.len().max(b.len()); + if max_len == 0 { + return 1.0; } + + let common_prefix = a + .chars() + .zip(b.chars()) + .take_while(|(ca, cb)| ca == cb) + .count(); + let common_suffix = a + .chars() + .rev() + .zip(b.chars().rev()) + .take_while(|(ca, cb)| ca == cb) + .count(); + + // Avoid double-counting when strings overlap entirely + let matching = (common_prefix + common_suffix).min(max_len); + matching as f64 / max_len as f64 } fn condense_unified_diff(diff: &str) -> String { @@ -236,8 +245,8 @@ mod tests { #[test] fn test_similarity_partial_overlap() { let s = similarity("abcd", "abef"); - // Shared: a, b. Union: a, b, c, d, e, f = 6. Jaccard = 2/6 - assert!((s - 2.0 / 6.0).abs() < f64::EPSILON); + // Common prefix: "ab" (2). Common suffix: "" (0). max_len=4. ratio=2/4=0.5 + assert!((s - 0.5).abs() < f64::EPSILON); } #[test] @@ -364,4 +373,47 @@ diff --git a/b.rs b/b.rs let result = condense_unified_diff(""); assert!(result.is_empty()); } + + // --- regression: issue #781 - modified lines must not report "identical" --- + + #[test] + fn test_compute_diff_single_char_difference_long_prefix() { + // "abcd" vs "abce" — only last char differs + let a = vec!["abcd"]; + let b = vec!["abce"]; + let result = compute_diff(&a, &b); + assert!( + result.added > 0 || result.removed > 0 || result.modified > 0, + "Diff must detect change between 'abcd' and 'abce'" + ); + } + + #[test] + fn test_compute_diff_foo_bar_vs_foo_baz() { + let a = vec!["foo bar"]; + let b = vec!["foo baz"]; + let result = compute_diff(&a, &b); + assert!( + result.added > 0 || result.removed > 0 || result.modified > 0, + "Diff must detect change between 'foo bar' and 'foo baz'" + ); + } + + #[test] + fn test_compute_diff_abc_def_vs_abc_deg() { + let a = vec!["abc def"]; + let b = vec!["abc deg"]; + let result = compute_diff(&a, &b); + assert!( + result.added > 0 || result.removed > 0 || result.modified > 0, + "Diff must detect change between 'abc def' and 'abc deg'" + ); + } + + #[test] + fn test_similarity_single_char_change_not_identical() { + // Lines differing by one char must not be treated as identical + assert!(similarity("abcd", "abce") < 1.0); + assert!(similarity("foo bar", "foo baz") < 1.0); + } }