From 31e27fbed0aa7a0f158ef1c5dbb202ec47fe5c91 Mon Sep 17 00:00:00 2001
From: gghez <gghez@users.noreply.github.com>
Date: Tue, 24 Mar 2026 03:48:13 +0100
Subject: [PATCH] fix(diff): check modified count in identical-files guard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The "Files are identical" check only tested added and removed counters,
ignoring modified lines. When two lines shared enough characters to be
classified as a modification (similarity > 0.5), they incremented
modified but not added/removed, so the guard passed and the diff
reported no changes.

Also replace the Jaccard set-based similarity function with a
positional prefix+suffix metric that better reflects actual line
differences — the old approach used unique character sets, which
over-estimated similarity for lines sharing common characters in
different positions.

Fixes #781

Signed-off-by: gghez <gghez@users.noreply.github.com>
---
 src/diff_cmd.rs | 78 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 65 insertions(+), 13 deletions(-)
diff --git a/src/diff_cmd.rs b/src/diff_cmd.rs
index d9299eb5..5faf58c3 100644
--- a/src/diff_cmd.rs
+++ b/src/diff_cmd.rs
@@ -21,7 +21,7 @@ pub fn run(file1: &Path, file2: &Path, verbose: u8) -> Result<()> {
     let diff = compute_diff(&lines1, &lines2);
     let mut rtk = String::new();
 
-    if diff.added == 0 && diff.removed == 0 {
+    if diff.added == 0 && diff.removed == 0 && diff.modified == 0 {
         rtk.push_str("[ok] Files are identical");
         println!("{}", rtk);
         timer.track(
@@ -143,17 +143,26 @@ fn compute_diff(lines1: &[&str], lines2: &[&str]) -> DiffResult {
 }
 
 fn similarity(a: &str, b: &str) -> f64 {
-    let a_chars: std::collections::HashSet<char> = a.chars().collect();
-    let b_chars: std::collections::HashSet<char> = b.chars().collect();
-
-    let intersection = a_chars.intersection(&b_chars).count();
-    let union = a_chars.union(&b_chars).count();
-
-    if union == 0 {
-        1.0
-    } else {
-        intersection as f64 / union as f64
+    let max_len = a.len().max(b.len());
+    if max_len == 0 {
+        return 1.0;
     }
+
+    let common_prefix = a
+        .chars()
+        .zip(b.chars())
+        .take_while(|(ca, cb)| ca == cb)
+        .count();
+    let common_suffix = a
+        .chars()
+        .rev()
+        .zip(b.chars().rev())
+        .take_while(|(ca, cb)| ca == cb)
+        .count();
+
+    // Avoid double-counting when strings overlap entirely
+    let matching = (common_prefix + common_suffix).min(max_len);
+    matching as f64 / max_len as f64
 }
 
 fn condense_unified_diff(diff: &str) -> String {
@@ -236,8 +245,8 @@ mod tests {
     #[test]
     fn test_similarity_partial_overlap() {
         let s = similarity("abcd", "abef");
-        // Shared: a, b. Union: a, b, c, d, e, f = 6. Jaccard = 2/6
-        assert!((s - 2.0 / 6.0).abs() < f64::EPSILON);
+        // Common prefix: "ab" (2). Common suffix: "" (0). max_len=4. ratio=2/4=0.5
+        assert!((s - 0.5).abs() < f64::EPSILON);
     }
 
     #[test]
@@ -364,4 +373,47 @@ diff --git a/b.rs b/b.rs
         let result = condense_unified_diff("");
         assert!(result.is_empty());
     }
+
+    // --- regression: issue #781 - modified lines must not report "identical" ---
+
+    #[test]
+    fn test_compute_diff_single_char_difference_long_prefix() {
+        // "abcd" vs "abce" — only last char differs
+        let a = vec!["abcd"];
+        let b = vec!["abce"];
+        let result = compute_diff(&a, &b);
+        assert!(
+            result.added > 0 || result.removed > 0 || result.modified > 0,
+            "Diff must detect change between 'abcd' and 'abce'"
+        );
+    }
+
+    #[test]
+    fn test_compute_diff_foo_bar_vs_foo_baz() {
+        let a = vec!["foo bar"];
+        let b = vec!["foo baz"];
+        let result = compute_diff(&a, &b);
+        assert!(
+            result.added > 0 || result.removed > 0 || result.modified > 0,
+            "Diff must detect change between 'foo bar' and 'foo baz'"
+        );
+    }
+
+    #[test]
+    fn test_compute_diff_abc_def_vs_abc_deg() {
+        let a = vec!["abc def"];
+        let b = vec!["abc deg"];
+        let result = compute_diff(&a, &b);
+        assert!(
+            result.added > 0 || result.removed > 0 || result.modified > 0,
+            "Diff must detect change between 'abc def' and 'abc deg'"
+        );
+    }
+
+    #[test]
+    fn test_similarity_single_char_change_not_identical() {
+        // Lines differing by one char must not be treated as identical
+        assert!(similarity("abcd", "abce") < 1.0);
+        assert!(similarity("foo bar", "foo baz") < 1.0);
+    }
 }