From e5e4e841f822739631305823a4c118680f4d0ee2 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sun, 12 Apr 2026 10:58:01 -0400
Subject: [PATCH 01/26] refactor(patch): ParseOpts for configurable parsing
 behavior

---
 src/patch/parse.rs     | 46 +++++++++++++++++++++++++++++++-----------
 src/patch_set/parse.rs |  3 ++-
 2 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/src/patch/parse.rs b/src/patch/parse.rs
index dd74b15..2c40209 100644
--- a/src/patch/parse.rs
+++ b/src/patch/parse.rs
@@ -12,6 +12,31 @@ use std::borrow::Cow;
 
 type Result<T, E = ParsePatchError> = std::result::Result<T, E>;
 
+/// Options that control parsing behavior.
+///
+/// Defaults match the [`parse`]/[`parse_bytes`] behavior.
+#[derive(Clone, Copy)]
+pub(crate) struct ParseOpts {
+    reject_orphaned_hunks: bool,
+}
+
+impl Default for ParseOpts {
+    fn default() -> Self {
+        Self {
+            reject_orphaned_hunks: false,
+        }
+    }
+}
+
+impl ParseOpts {
+    /// Reject orphaned `@@ ` hunk headers after parsed hunks,
+    /// matching `git apply` behavior.
+    pub(crate) fn reject_orphaned_hunks(mut self) -> Self {
+        self.reject_orphaned_hunks = true;
+        self
+    }
+}
+
 struct Parser<'a, T: Text + ?Sized> {
     lines: std::iter::Peekable<LineIter<'a, T>>,
     offset: usize,
@@ -54,7 +79,7 @@ impl<'a, T: Text + ?Sized> Parser<'a, T> {
 }
 
 pub fn parse(input: &str) -> Result<Patch<'_, str>> {
-    let (result, _consumed) = parse_one(input);
+    let (result, _consumed) = parse_one(input, ParseOpts::default());
     result
 }
 
@@ -62,7 +87,7 @@ pub fn parse(input: &str) -> Result<Patch<'_, str>> {
 ///
 /// Always returns consumed bytes alongside the result
 /// so callers can advance past the parsed or partially parsed content.
-pub(crate) fn parse_one(input: &str) -> (Result<Patch<'_, str>>, usize) {
+pub(crate) fn parse_one(input: &str, opts: ParseOpts) -> (Result<Patch<'_, str>>, usize) {
     let mut parser = Parser::new(input);
 
     let header = match patch_header(&mut parser) {
@@ -73,6 +98,11 @@ pub(crate) fn parse_one(input: &str) -> (Result<Patch<'_, str>>, usize) {
         Ok(h) => h,
         Err(e) => return (Err(e), parser.offset()),
     };
+    if opts.reject_orphaned_hunks {
+        if let Err(e) = reject_orphaned_hunk_headers(&mut parser) {
+            return (Err(e), parser.offset());
+        }
+    }
 
     let patch = Patch::new(
         header.0.map(convert_cow_to_str),
@@ -83,16 +113,8 @@ pub(crate) fn parse_one(input: &str) -> (Result<Patch<'_, str>>, usize) {
 }
 
 pub fn parse_strict(input: &str) -> Result<Patch<'_, str>> {
-    let mut parser = Parser::new(input);
-    let header = patch_header(&mut parser)?;
-    let hunks = hunks(&mut parser)?;
-    reject_orphaned_hunk_headers(&mut parser)?;
-
-    Ok(Patch::new(
-        header.0.map(convert_cow_to_str),
-        header.1.map(convert_cow_to_str),
-        hunks,
-    ))
+    let (result, _consumed) = parse_one(input, ParseOpts::default().reject_orphaned_hunks());
+    result
 }
 
 pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs
index c4864f9..cec612b 100644
--- a/src/patch_set/parse.rs
+++ b/src/patch_set/parse.rs
@@ -83,7 +83,8 @@ impl<'a> PatchSet<'a> {
 
         let patch_input = &remaining[patch_start..];
 
-        let (result, consumed) = parse_one(patch_input);
+        let opts = crate::patch::parse::ParseOpts::default();
+        let (result, consumed) = parse_one(patch_input, opts);
         // Always advance so the iterator makes progress even on error.
         let abs_patch_start = self.offset + patch_start;
         self.offset += patch_start + consumed;

From 1a83fe90451af5c497678508cf16b9e816f00344 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sun, 12 Apr 2026 11:05:06 -0400
Subject: [PATCH 02/26] refactor(patch): make preamble skipping configurable

---
 src/patch/parse.rs | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/patch/parse.rs b/src/patch/parse.rs
index 2c40209..b6b869b 100644
--- a/src/patch/parse.rs
+++ b/src/patch/parse.rs
@@ -17,18 +17,30 @@ type Result<T, E = ParsePatchError> = std::result::Result<T, E>;
 /// Defaults match the [`parse`]/[`parse_bytes`] behavior.
 #[derive(Clone, Copy)]
 pub(crate) struct ParseOpts {
+    skip_preamble: bool,
     reject_orphaned_hunks: bool,
 }
 
 impl Default for ParseOpts {
     fn default() -> Self {
         Self {
+            skip_preamble: true,
             reject_orphaned_hunks: false,
         }
     }
 }
 
 impl ParseOpts {
+    /// Don't skip preamble lines before `---`/`+++`/`@@`.
+    ///
+    /// Useful when the caller has already positioned the input
+    /// at the start of the patch content.
+    #[allow(dead_code)] // will be used by patch_set parser
+    pub(crate) fn no_skip_preamble(mut self) -> Self {
+        self.skip_preamble = false;
+        self
+    }
+
     /// Reject orphaned `@@ ` hunk headers after parsed hunks,
     /// matching `git apply` behavior.
     pub(crate) fn reject_orphaned_hunks(mut self) -> Self {
@@ -90,7 +102,7 @@ pub fn parse(input: &str) -> Result<Patch<'_, str>> {
 pub(crate) fn parse_one(input: &str, opts: ParseOpts) -> (Result<Patch<'_, str>>, usize) {
     let mut parser = Parser::new(input);
 
-    let header = match patch_header(&mut parser) {
+    let header = match patch_header(&mut parser, &opts) {
         Ok(h) => h,
         Err(e) => return (Err(e), parser.offset()),
     };
@@ -119,7 +131,7 @@ pub fn parse_strict(input: &str) -> Result<Patch<'_, str>> {
 
 pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
     let mut parser = Parser::new(input);
-    let header = patch_header(&mut parser)?;
+    let header = patch_header(&mut parser, &ParseOpts::default())?;
     let hunks = hunks(&mut parser)?;
 
     Ok(Patch::new(header.0, header.1, hunks))
@@ -127,7 +139,7 @@ pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
 
 pub fn parse_bytes_strict(input: &[u8]) -> Result<Patch<'_, [u8]>> {
     let mut parser = Parser::new(input);
-    let header = patch_header(&mut parser)?;
+    let header = patch_header(&mut parser, &ParseOpts::default())?;
     let hunks = hunks(&mut parser)?;
     reject_orphaned_hunk_headers(&mut parser)?;
 
@@ -145,8 +157,11 @@ fn convert_cow_to_str(cow: Cow<'_, [u8]>) -> Cow<'_, str> {
 #[allow(clippy::type_complexity)]
 fn patch_header<'a, T: Text + ToOwned + ?Sized>(
     parser: &mut Parser<'a, T>,
+    opts: &ParseOpts,
 ) -> Result<(Option<Cow<'a, [u8]>>, Option<Cow<'a, [u8]>>)> {
-    skip_header_preamble(parser)?;
+    if opts.skip_preamble {
+        skip_header_preamble(parser)?;
+    }
 
     let mut filename1 = None;
     let mut filename2 = None;

From d1ec23656a3089cd1d67a1e6c329aab5c0b5be72 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sun, 12 Apr 2026 11:09:12 -0400
Subject: [PATCH 03/26] refactor(patch): organize item order

---
 src/patch/parse.rs | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/patch/parse.rs b/src/patch/parse.rs
index b6b869b..4400aec 100644
--- a/src/patch/parse.rs
+++ b/src/patch/parse.rs
@@ -95,6 +95,28 @@ pub fn parse(input: &str) -> Result<Patch<'_, str>> {
     result
 }
 
+pub fn parse_strict(input: &str) -> Result<Patch<'_, str>> {
+    let (result, _consumed) = parse_one(input, ParseOpts::default().reject_orphaned_hunks());
+    result
+}
+
+pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
+    let mut parser = Parser::new(input);
+    let header = patch_header(&mut parser, &ParseOpts::default())?;
+    let hunks = hunks(&mut parser)?;
+
+    Ok(Patch::new(header.0, header.1, hunks))
+}
+
+pub fn parse_bytes_strict(input: &[u8]) -> Result<Patch<'_, [u8]>> {
+    let mut parser = Parser::new(input);
+    let header = patch_header(&mut parser, &ParseOpts::default())?;
+    let hunks = hunks(&mut parser)?;
+    reject_orphaned_hunk_headers(&mut parser)?;
+
+    Ok(Patch::new(header.0, header.1, hunks))
+}
+
 /// Parses one patch from input.
 ///
 /// Always returns consumed bytes alongside the result
@@ -124,28 +146,6 @@ pub(crate) fn parse_one(input: &str, opts: ParseOpts) -> (Result<Patch<'_, str>>
     (Ok(patch), parser.offset())
 }
 
-pub fn parse_strict(input: &str) -> Result<Patch<'_, str>> {
-    let (result, _consumed) = parse_one(input, ParseOpts::default().reject_orphaned_hunks());
-    result
-}
-
-pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
-    let mut parser = Parser::new(input);
-    let header = patch_header(&mut parser, &ParseOpts::default())?;
-    let hunks = hunks(&mut parser)?;
-
-    Ok(Patch::new(header.0, header.1, hunks))
-}
-
-pub fn parse_bytes_strict(input: &[u8]) -> Result<Patch<'_, [u8]>> {
-    let mut parser = Parser::new(input);
-    let header = patch_header(&mut parser, &ParseOpts::default())?;
-    let hunks = hunks(&mut parser)?;
-    reject_orphaned_hunk_headers(&mut parser)?;
-
-    Ok(Patch::new(header.0, header.1, hunks))
-}
-
 // This is only used when the type originated as a utf8 string
 fn convert_cow_to_str(cow: Cow<'_, [u8]>) -> Cow<'_, str> {
     match cow {

From fe9b1b3be77b04bb8ea61956a37443c1b7043a28 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sun, 12 Apr 2026 11:10:47 -0400
Subject: [PATCH 04/26] refactor(patch): wire up ParseOpts for byte parsing

---
 src/patch/parse.rs | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/src/patch/parse.rs b/src/patch/parse.rs
index 4400aec..3d7f3ca 100644
--- a/src/patch/parse.rs
+++ b/src/patch/parse.rs
@@ -101,20 +101,13 @@ pub fn parse_strict(input: &str) -> Result<Patch<'_, str>> {
 }
 
 pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
-    let mut parser = Parser::new(input);
-    let header = patch_header(&mut parser, &ParseOpts::default())?;
-    let hunks = hunks(&mut parser)?;
-
-    Ok(Patch::new(header.0, header.1, hunks))
+    let (result, _consumed) = parse_bytes_one(input, ParseOpts::default());
+    result
 }
 
 pub fn parse_bytes_strict(input: &[u8]) -> Result<Patch<'_, [u8]>> {
-    let mut parser = Parser::new(input);
-    let header = patch_header(&mut parser, &ParseOpts::default())?;
-    let hunks = hunks(&mut parser)?;
-    reject_orphaned_hunk_headers(&mut parser)?;
-
-    Ok(Patch::new(header.0, header.1, hunks))
+    let (result, _consumed) = parse_bytes_one(input, ParseOpts::default().reject_orphaned_hunks());
+    result
 }
 
 /// Parses one patch from input.
@@ -146,6 +139,27 @@ pub(crate) fn parse_one(input: &str, opts: ParseOpts) -> (Result<Patch<'_, str>>
     (Ok(patch), parser.offset())
 }
 
+/// Like [`parse_one`] but for bytes.
+pub(crate) fn parse_bytes_one(input: &[u8], opts: ParseOpts) -> (Result<Patch<'_, [u8]>>, usize) {
+    let mut parser = Parser::new(input);
+
+    let header = match patch_header(&mut parser, &opts) {
+        Ok(h) => h,
+        Err(e) => return (Err(e), parser.offset()),
+    };
+    let hunks = match hunks(&mut parser) {
+        Ok(h) => h,
+        Err(e) => return (Err(e), parser.offset()),
+    };
+    if opts.reject_orphaned_hunks {
+        if let Err(e) = reject_orphaned_hunk_headers(&mut parser) {
+            return (Err(e), parser.offset());
+        }
+    }
+
+    (Ok(Patch::new(header.0, header.1, hunks)), parser.offset())
+}
+
 // This is only used when the type originated as a utf8 string
 fn convert_cow_to_str(cow: Cow<'_, [u8]>) -> Cow<'_, str> {
     match cow {

From f4e6b2ba297cf8bd1d25f3be0c2986682f3a24d9 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sun, 12 Apr 2026 11:24:20 -0400
Subject: [PATCH 05/26] refactor(patch): deduplicate parse_one logic

---
 src/patch/parse.rs | 49 ++++++++++------------------------------------
 src/utils.rs       | 22 ++++++++++++++++-----
 2 files changed, 27 insertions(+), 44 deletions(-)

diff --git a/src/patch/parse.rs b/src/patch/parse.rs
index 3d7f3ca..c58dfd7 100644
--- a/src/patch/parse.rs
+++ b/src/patch/parse.rs
@@ -101,12 +101,12 @@ pub fn parse_strict(input: &str) -> Result<Patch<'_, str>> {
 }
 
 pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
-    let (result, _consumed) = parse_bytes_one(input, ParseOpts::default());
+    let (result, _consumed) = parse_one(input, ParseOpts::default());
     result
 }
 
 pub fn parse_bytes_strict(input: &[u8]) -> Result<Patch<'_, [u8]>> {
-    let (result, _consumed) = parse_bytes_one(input, ParseOpts::default().reject_orphaned_hunks());
+    let (result, _consumed) = parse_one(input, ParseOpts::default().reject_orphaned_hunks());
     result
 }
 
@@ -114,7 +114,10 @@ pub fn parse_bytes_strict(input: &[u8]) -> Result<Patch<'_, [u8]>> {
 ///
 /// Always returns consumed bytes alongside the result
 /// so callers can advance past the parsed or partially parsed content.
-pub(crate) fn parse_one(input: &str, opts: ParseOpts) -> (Result<Patch<'_, str>>, usize) {
+pub(crate) fn parse_one<'a, T: Text + ?Sized>(
+    input: &'a T,
+    opts: ParseOpts,
+) -> (Result<Patch<'a, T>>, usize) {
     let mut parser = Parser::new(input);
 
     let header = match patch_header(&mut parser, &opts) {
@@ -132,44 +135,15 @@ pub(crate) fn parse_one(input: &str, opts: ParseOpts) -> (Result<Patch<'_, str>>
     }
 
     let patch = Patch::new(
-        header.0.map(convert_cow_to_str),
-        header.1.map(convert_cow_to_str),
+        header.0.map(T::from_bytes_cow),
+        header.1.map(T::from_bytes_cow),
         hunks,
     );
     (Ok(patch), parser.offset())
 }
 
-/// Like [`parse_one`] but for bytes.
-pub(crate) fn parse_bytes_one(input: &[u8], opts: ParseOpts) -> (Result<Patch<'_, [u8]>>, usize) {
-    let mut parser = Parser::new(input);
-
-    let header = match patch_header(&mut parser, &opts) {
-        Ok(h) => h,
-        Err(e) => return (Err(e), parser.offset()),
-    };
-    let hunks = match hunks(&mut parser) {
-        Ok(h) => h,
-        Err(e) => return (Err(e), parser.offset()),
-    };
-    if opts.reject_orphaned_hunks {
-        if let Err(e) = reject_orphaned_hunk_headers(&mut parser) {
-            return (Err(e), parser.offset());
-        }
-    }
-
-    (Ok(Patch::new(header.0, header.1, hunks)), parser.offset())
-}
-
-// This is only used when the type originated as a utf8 string
-fn convert_cow_to_str(cow: Cow<'_, [u8]>) -> Cow<'_, str> {
-    match cow {
-        Cow::Borrowed(b) => std::str::from_utf8(b).unwrap().into(),
-        Cow::Owned(o) => String::from_utf8(o).unwrap().into(),
-    }
-}
-
 #[allow(clippy::type_complexity)]
-fn patch_header<'a, T: Text + ToOwned + ?Sized>(
+fn patch_header<'a, T: Text + ?Sized>(
     parser: &mut Parser<'a, T>,
     opts: &ParseOpts,
 ) -> Result<(Option<Cow<'a, [u8]>>, Option<Cow<'a, [u8]>>)> {
@@ -212,10 +186,7 @@ fn skip_header_preamble<T: Text + ?Sized>(parser: &mut Parser<'_, T>) -> Result<
     Ok(())
 }
 
-fn parse_filename<'a, T: Text + ToOwned + ?Sized>(
-    prefix: &str,
-    line: &'a T,
-) -> Result<Cow<'a, [u8]>> {
+fn parse_filename<'a, T: Text + ?Sized>(prefix: &str, line: &'a T) -> Result<Cow<'a, [u8]>> {
     let line = line
         .strip_prefix(prefix)
         .ok_or(ParsePatchErrorKind::InvalidFilename)?;
diff --git a/src/utils.rs b/src/utils.rs
index c3b1594..4e98c95 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -132,7 +132,7 @@ impl<'a, T: Text + ?Sized> Iterator for LineIter<'a, T> {
 
 /// A helper trait for processing text like `str` and `[u8]`
 /// Useful for abstracting over those types for parsing as well as breaking input into lines
-pub trait Text: Eq + Hash {
+pub trait Text: Eq + Hash + ToOwned {
     fn is_empty(&self) -> bool;
     fn len(&self) -> usize;
     fn starts_with(&self, prefix: &str) -> bool;
@@ -148,6 +148,9 @@ pub trait Text: Eq + Hash {
     #[allow(unused)]
     fn lines(&self) -> LineIter<'_, Self>;
 
+    /// Converts a byte-oriented `Cow` into a `Cow` of `Self`.
+    fn from_bytes_cow(cow: Cow<'_, [u8]>) -> Cow<'_, Self>;
+
     fn parse<T: std::str::FromStr>(&self) -> Option<T> {
         self.as_str().and_then(|s| s.parse().ok())
     }
@@ -202,6 +205,13 @@ impl Text for str {
     fn lines(&self) -> LineIter<'_, Self> {
         LineIter::new(self)
     }
+
+    fn from_bytes_cow(cow: Cow<'_, [u8]>) -> Cow<'_, Self> {
+        match cow {
+            Cow::Borrowed(b) => Cow::Borrowed(std::str::from_utf8(b).unwrap()),
+            Cow::Owned(o) => Cow::Owned(String::from_utf8(o).unwrap()),
+        }
+    }
 }
 
 impl Text for [u8] {
@@ -252,6 +262,10 @@ impl Text for [u8] {
     fn lines(&self) -> LineIter<'_, Self> {
         LineIter::new(self)
     }
+
+    fn from_bytes_cow(cow: Cow<'_, [u8]>) -> Cow<'_, Self> {
+        cow
+    }
 }
 
 fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
@@ -292,7 +306,7 @@ fn find_byte(haystack: &[u8], byte: u8) -> Option<usize> {
 ///
 /// See [`byte_needs_quoting`] for the set of characters that
 /// require quoting.
-pub(crate) fn escaped_filename<T: Text + ToOwned + ?Sized>(
+pub(crate) fn escaped_filename<T: Text + ?Sized>(
     filename: &T,
 ) -> Result<Cow<'_, [u8]>, ParsePatchError> {
     if let Some(inner) = filename
@@ -309,9 +323,7 @@ pub(crate) fn escaped_filename<T: Text + ToOwned + ?Sized>(
     }
 }
 
-fn decode_escaped<T: Text + ToOwned + ?Sized>(
-    escaped: &T,
-) -> Result<Cow<'_, [u8]>, ParsePatchError> {
+fn decode_escaped<T: Text + ?Sized>(escaped: &T) -> Result<Cow<'_, [u8]>, ParsePatchError> {
     let bytes = escaped.as_bytes();
     let mut result = Vec::new();
     let mut i = 0;

From edf93e51e5d5945aa8472af6be468d20a60c8bc0 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sun, 12 Apr 2026 12:30:29 -0400
Subject: [PATCH 06/26] refactor(patch_set): extract `line_ending_len` helper

`.lines()` strips line endings, so callers tracking byte offsets
need to re-add the `\r\n` or `\n` length manually.
Extract the repeated inline pattern into a reusable helper.
---
 src/patch_set/parse.rs | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs
index cec612b..e93d7b6 100644
--- a/src/patch_set/parse.rs
+++ b/src/patch_set/parse.rs
@@ -142,12 +142,7 @@ fn find_patch_start(input: &str) -> Option<usize> {
             return Some(offset);
         }
         offset += line.len();
-        // Account for the line ending that `.lines()` strips
-        if input[offset..].starts_with("\r\n") {
-            offset += 2;
-        } else if input[offset..].starts_with('\n') {
-            offset += 1;
-        }
+        offset += line_ending_len(&input[offset..]);
     }
     None
 }
@@ -222,3 +217,17 @@ pub(crate) fn extract_file_op_unidiff<'a>(
         }
     }
 }
+
+/// Returns the length of the line ending at the start of `s`.
+///
+/// `.lines()` strips line endings, so callers tracking byte offsets need to
+/// account for the `\r\n` or `\n` that was consumed.
+fn line_ending_len(s: &str) -> usize {
+    if s.starts_with("\r\n") {
+        2
+    } else if s.starts_with('\n') {
+        1
+    } else {
+        0
+    }
+}

From 90861e0faee3c4ac143e523aeccb0b79e2ebaa8c Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sun, 12 Apr 2026 12:30:59 -0400
Subject: [PATCH 07/26] feat(patch_set): support git diff application

* Parse `diff --git` extended headers
* split multi-file git diffs at `diff --git` boundaries
---
 src/patch/parse.rs     |   1 -
 src/patch_set/error.rs |  14 ++
 src/patch_set/mod.rs   |  76 +++++++-
 src/patch_set/parse.rs | 431 ++++++++++++++++++++++++++++++++++++++++-
 src/patch_set/tests.rs | 121 ++++++++++++
 5 files changed, 638 insertions(+), 5 deletions(-)

diff --git a/src/patch/parse.rs b/src/patch/parse.rs
index c58dfd7..2ccb409 100644
--- a/src/patch/parse.rs
+++ b/src/patch/parse.rs
@@ -35,7 +35,6 @@ impl ParseOpts {
     ///
     /// Useful when the caller has already positioned the input
     /// at the start of the patch content.
-    #[allow(dead_code)] // will be used by patch_set parser
     pub(crate) fn no_skip_preamble(mut self) -> Self {
         self.skip_preamble = false;
         self
diff --git a/src/patch_set/error.rs b/src/patch_set/error.rs
index ba60e86..9cd6aa9 100644
--- a/src/patch_set/error.rs
+++ b/src/patch_set/error.rs
@@ -70,6 +70,15 @@ pub(crate) enum PatchSetParseErrorKind {
 
     /// Create patch missing modified path.
     CreateMissingModifiedPath,
+
+    /// Invalid file mode string.
+    InvalidFileMode(String),
+
+    /// Invalid `diff --git` path.
+    InvalidDiffGitPath,
+
+    /// Binary diff not supported in current configuration.
+    BinaryNotSupported { path: String },
 }
 
 impl fmt::Display for PatchSetParseErrorKind {
@@ -81,6 +90,11 @@ impl fmt::Display for PatchSetParseErrorKind {
             Self::BothDevNull => write!(f, "patch has both original and modified as /dev/null"),
             Self::DeleteMissingOriginalPath => write!(f, "delete patch has no original path"),
             Self::CreateMissingModifiedPath => write!(f, "create patch has no modified path"),
+            Self::InvalidFileMode(mode) => write!(f, "invalid file mode: {mode}"),
+            Self::InvalidDiffGitPath => write!(f, "invalid diff --git path"),
+            Self::BinaryNotSupported { path } => {
+                write!(f, "binary diff not supported: {path}")
+            }
         }
     }
 }
diff --git a/src/patch_set/mod.rs b/src/patch_set/mod.rs
index 7e15a6e..f83442c 100644
--- a/src/patch_set/mod.rs
+++ b/src/patch_set/mod.rs
@@ -13,11 +13,25 @@ use std::borrow::Cow;
 use crate::Patch;
 
 pub use error::PatchSetParseError;
+use error::PatchSetParseErrorKind;
 pub use parse::PatchSet;
 
 /// Options for parsing patch content.
 ///
-/// Use [`ParseOptions::unidiff()`] to create options for the desired format.
+/// Use [`ParseOptions::unidiff()`] or [`ParseOptions::gitdiff()`]
+/// to create options for the desired format.
+///
+/// ## Binary Files
+///
+/// When parsing git diffs, binary file changes are detected by:
+///
+/// * `Binary files a/path and b/path differ` (`git diff` without `--binary` flag)
+/// * `GIT binary patch` (from `git diff --binary`)
+///
+/// Note that this is not a documented Git behavior,
+/// so the implementation here is subject to change if Git changes.
+///
+/// By default, binary diffs are skipped.
 ///
 /// ## Example
 ///
@@ -40,12 +54,26 @@ pub use parse::PatchSet;
 #[derive(Debug, Clone)]
 pub struct ParseOptions {
     pub(crate) format: Format,
+    pub(crate) binary: Binary,
 }
 
-#[derive(Debug, Clone, Copy)]
+#[derive(Debug, Clone, Copy, Default)]
 pub(crate) enum Format {
     /// Standard unified diff format.
+    #[default]
     UniDiff,
+    /// Git extended diff format.
+    GitDiff,
+}
+
+/// How to handle binary diffs in GitDiff mode.
+#[derive(Debug, Clone, Copy, Default)]
+pub(crate) enum Binary {
+    /// Skip binary diffs silently.
+    #[default]
+    Skip,
+    /// Return error if binary diff encountered.
+    Fail,
 }
 
 impl ParseOptions {
@@ -64,8 +92,38 @@ impl ParseOptions {
     pub fn unidiff() -> Self {
         Self {
             format: Format::UniDiff,
+            binary: Default::default(),
         }
     }
+
+    /// Parse as [git extended diff format][git-diff-format].
+    ///
+    /// Supports all features of [`unidiff()`](Self::unidiff) plus:
+    ///
+    /// * `diff --git` headers
+    /// * Extended headers (`new file mode`, `deleted file mode`, etc.)
+    /// * Rename/copy detection (`rename from`/`rename to`, `copy from`/`copy to`)
+    /// * Binary file detection (skipped by default)
+    ///
+    /// [git-diff-format]: https://git-scm.com/docs/diff-format
+    pub fn gitdiff() -> Self {
+        Self {
+            format: Format::GitDiff,
+            binary: Default::default(),
+        }
+    }
+
+    /// Skip binary diffs silently.
+    pub fn skip_binary(mut self) -> Self {
+        self.binary = Binary::Skip;
+        self
+    }
+
+    /// Return an error if a binary diff is encountered.
+    pub fn fail_on_binary(mut self) -> Self {
+        self.binary = Binary::Fail;
+        self
+    }
 }
 
 /// File mode extracted from git extended headers.
@@ -81,6 +139,20 @@ pub enum FileMode {
     Gitlink,
 }
 
+impl std::str::FromStr for FileMode {
+    type Err = PatchSetParseError;
+
+    fn from_str(mode: &str) -> Result<Self, Self::Err> {
+        match mode {
+            "100644" => Ok(Self::Regular),
+            "100755" => Ok(Self::Executable),
+            "120000" => Ok(Self::Symlink),
+            "160000" => Ok(Self::Gitlink),
+            _ => Err(PatchSetParseErrorKind::InvalidFileMode(mode.to_owned()).into()),
+        }
+    }
+}
+
 /// The kind of patch content in a [`FilePatch`].
 #[derive(Clone, PartialEq, Eq)]
 pub enum PatchKind<'a, T: ToOwned + ?Sized> {
diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs
index e93d7b6..0d8b994 100644
--- a/src/patch_set/parse.rs
+++ b/src/patch_set/parse.rs
@@ -1,10 +1,12 @@
 //! Parse multiple file patches from a unified diff.
 
 use super::{
-    error::PatchSetParseErrorKind, FileOperation, FilePatch, Format, ParseOptions,
-    PatchSetParseError,
+    error::PatchSetParseErrorKind, Binary, FileMode, FileOperation, FilePatch, Format,
+    ParseOptions, PatchSetParseError,
 };
 use crate::patch::parse::parse_one;
+use crate::utils::escaped_filename;
+use crate::Patch;
 
 use std::borrow::Cow;
 
@@ -72,6 +74,71 @@ impl<'a> PatchSet<'a> {
         PatchSetParseError::new(kind, self.offset..self.offset)
     }
 
+    fn next_gitdiff_patch(&mut self) -> Option<Result<FilePatch<'a, str>, PatchSetParseError>> {
+        let remaining = &self.input[self.offset..];
+        let patch_start = find_gitdiff_start(remaining)?;
+        self.offset += patch_start;
+        self.found_any = true;
+
+        let abs_patch_start = self.offset;
+
+        // Parse extended headers incrementally — stops at first unrecognized line
+        let (header, header_consumed) = GitHeader::parse(&self.input[self.offset..]);
+        self.offset += header_consumed;
+
+        // Handle binary markers ("Binary files ... differ")
+        if header.is_binary_marker || header.is_binary_patch {
+            match self.opts.binary {
+                Binary::Skip => {
+                    return self.next_gitdiff_patch();
+                }
+                Binary::Fail => {
+                    let path = header.diff_git_line.unwrap_or("<unknown>").to_owned();
+                    return Some(Err(PatchSetParseError::new(
+                        PatchSetParseErrorKind::BinaryNotSupported { path },
+                        abs_patch_start..abs_patch_start,
+                    )));
+                }
+            }
+        }
+
+        // `git diff` output format is stricter.
+        // There is no preamble between Git headers and unidiff patch portion,
+        // so we safely don't perform the preamble skipping.
+        //
+        // If we did, it would fail the pure rename/mode-change operation
+        // since those ops have no unidiff patch portion
+        // and is directly followed by the next `diff --git` header.
+        let opts = crate::patch::parse::ParseOpts::default().no_skip_preamble();
+        let remaining = &self.input[self.offset..];
+        let (result, consumed) = parse_one(remaining, opts);
+        self.offset += consumed;
+        let patch = match result {
+            Ok(patch) => patch,
+            Err(e) => return Some(Err(e.into())),
+        };
+
+        // FIXME: error spans point at `diff --git` line, not the specific offending line
+        let operation = match extract_file_op_gitdiff(&header, &patch) {
+            Ok(op) => op,
+            Err(mut e) => {
+                e.set_span(abs_patch_start..abs_patch_start);
+                return Some(Err(e));
+            }
+        };
+
+        // FIXME: error spans point at `diff --git` line, not the specific offending line
+        let (old_mode, new_mode) = match parse_file_modes(&header) {
+            Ok(modes) => modes,
+            Err(mut e) => {
+                e.set_span(abs_patch_start..abs_patch_start);
+                return Some(Err(e));
+            }
+        };
+
+        Some(Ok(FilePatch::new(operation, patch, old_mode, new_mode)))
+    }
+
     fn next_unidiff_patch(&mut self) -> Option<Result<FilePatch<'a, str>, PatchSetParseError>> {
         let remaining = &self.input[self.offset..];
         if remaining.is_empty() {
@@ -125,6 +192,16 @@ impl<'a> Iterator for PatchSet<'a> {
                 }
                 result
             }
+            Format::GitDiff => {
+                let result = self.next_gitdiff_patch();
+                if result.is_none() {
+                    self.finished = true;
+                    if !self.found_any {
+                        return Some(Err(self.error(PatchSetParseErrorKind::NoPatchesFound)));
+                    }
+                }
+                result
+            }
         };
 
         result
@@ -174,6 +251,356 @@ fn strip_email_preamble(input: &str) -> &str {
     }
 }
 
+/// Finds the byte offset of the first `diff --git` line in `input`.
+fn find_gitdiff_start(input: &str) -> Option<usize> {
+    let mut offset = 0;
+    for line in input.lines() {
+        if line.starts_with("diff --git ") {
+            return Some(offset);
+        }
+        offset += line.len();
+        offset += line_ending_len(&input[offset..]);
+    }
+    None
+}
+
+/// Git extended header metadata.
+///
+/// Extracted from lines between `diff --git` and `---` (or end of patch).
+/// See [git-diff format documentation](https://git-scm.com/docs/diff-format).
+#[derive(Debug, Default)]
+struct GitHeader<'a> {
+    /// Raw content after "diff --git " prefix.
+    ///
+    /// Only parsed in fallback when `---`/`+++` is absent (mode-only, binary, empty file).
+    diff_git_line: Option<&'a str>,
+    /// Source path from `rename from <path>`.
+    rename_from: Option<&'a str>,
+    /// Destination path from `rename to <path>`.
+    rename_to: Option<&'a str>,
+    /// Source path from `copy from <path>`.
+    copy_from: Option<&'a str>,
+    /// Destination path from `copy to <path>`.
+    copy_to: Option<&'a str>,
+    /// File mode from `old mode <mode>`.
+    old_mode: Option<&'a str>,
+    /// File mode from `new mode <mode>`.
+    new_mode: Option<&'a str>,
+    /// File mode from `new file mode <mode>`.
+    new_file_mode: Option<&'a str>,
+    /// File mode from `deleted file mode <mode>`.
+    deleted_file_mode: Option<&'a str>,
+    /// Whether this is a binary diff with no actual patch content.
+    ///
+    /// Observed `git diff` output (without `--binary`):
+    ///
+    /// ```text
+    /// diff --git a/image.png b/image.png
+    /// new file mode 100644
+    /// index 0000000..7c4530c
+    /// Binary files /dev/null and b/image.png differ
+    /// ```
+    is_binary_marker: bool,
+    /// Whether this is a binary diff with actual patch content.
+    ///
+    /// Observed `git diff --binary` output:
+    ///
+    /// ```text
+    /// diff --git a/image.png b/image.png
+    /// new file mode 100644
+    /// index 0000000..7c4530c
+    /// GIT binary patch
+    /// literal 67
+    /// zcmV-J0KET+...
+    ///
+    /// literal 0
+    /// KcmV+b0RR6000031
+    /// ```
+    is_binary_patch: bool,
+}
+
+impl<'a> GitHeader<'a> {
+    /// Parses git extended headers incrementally from the current position.
+    ///
+    /// Consumes the `diff --git` line and all recognized extended header lines,
+    /// stopping at the first unrecognized line (typically `---`/`+++`/`@@`
+    /// or the next `diff --git`).
+    ///
+    /// Returns the parsed header and the number of bytes consumed.
+    fn parse(input: &'a str) -> (Self, usize) {
+        let mut header = GitHeader::default();
+        let mut consumed = 0;
+
+        for line in input.lines() {
+            if let Some(rest) = line.strip_prefix("diff --git ") {
+                // Only accept the first `diff --git` line.
+                // A second one means we've reached the next patch.
+                if header.diff_git_line.is_some() {
+                    break;
+                }
+                header.diff_git_line = Some(rest);
+            } else if let Some(path) = line.strip_prefix("rename from ") {
+                header.rename_from = Some(path);
+            } else if let Some(path) = line.strip_prefix("rename to ") {
+                header.rename_to = Some(path);
+            } else if let Some(path) = line.strip_prefix("copy from ") {
+                header.copy_from = Some(path);
+            } else if let Some(path) = line.strip_prefix("copy to ") {
+                header.copy_to = Some(path);
+            } else if let Some(mode) = line.strip_prefix("old mode ") {
+                header.old_mode = Some(mode);
+            } else if let Some(mode) = line.strip_prefix("new mode ") {
+                header.new_mode = Some(mode);
+            } else if let Some(mode) = line.strip_prefix("new file mode ") {
+                header.new_file_mode = Some(mode);
+            } else if let Some(mode) = line.strip_prefix("deleted file mode ") {
+                header.deleted_file_mode = Some(mode);
+            } else if line.starts_with("index ")
+                || line.starts_with("similarity index ")
+                || line.starts_with("dissimilarity index ")
+            {
+                // Recognized but nothing to extract.
+            } else if line.starts_with("Binary files ") {
+                header.is_binary_marker = true;
+            } else if line.starts_with("GIT binary patch") {
+                header.is_binary_patch = true;
+            } else {
+                // Unrecognized line: End of extended headers
+                // (typically `---`/`+++`/`@@` or trailing content).
+                break;
+            }
+
+            consumed += line.len();
+            consumed += line_ending_len(&input[consumed..]);
+        }
+
+        (header, consumed)
+    }
+}
+
+/// Determines the file operation from git headers and patch paths.
+fn extract_file_op_gitdiff<'a>(
+    header: &GitHeader<'a>,
+    patch: &Patch<'a, str>,
+) -> Result<FileOperation<'a>, PatchSetParseError> {
+    // Git headers are authoritative for rename/copy
+    if let (Some(from), Some(to)) = (header.rename_from, header.rename_to) {
+        return Ok(FileOperation::Rename {
+            from: Cow::Borrowed(from),
+            to: Cow::Borrowed(to),
+        });
+    }
+    if let (Some(from), Some(to)) = (header.copy_from, header.copy_to) {
+        return Ok(FileOperation::Copy {
+            from: Cow::Borrowed(from),
+            to: Cow::Borrowed(to),
+        });
+    }
+
+    // Try ---/+++ paths first
+    if patch.original().is_some() || patch.modified().is_some() {
+        return extract_file_op_unidiff(patch.original_path(), patch.modified_path());
+    }
+
+    // Fall back to `diff --git <old> <new>` for mode-only and empty file changes
+    let Some((original, modified)) = header.diff_git_line.and_then(parse_diff_git_path) else {
+        return Err(PatchSetParseErrorKind::InvalidDiffGitPath.into());
+    };
+
+    if header.new_file_mode.is_some() {
+        Ok(FileOperation::Create(modified))
+    } else if header.deleted_file_mode.is_some() {
+        Ok(FileOperation::Delete(original))
+    } else {
+        Ok(FileOperation::Modify { original, modified })
+    }
+}
+
+/// Parses file modes from git extended headers.
+fn parse_file_modes(
+    header: &GitHeader<'_>,
+) -> Result<(Option<FileMode>, Option<FileMode>), PatchSetParseError> {
+    let old_mode = header
+        .old_mode
+        .or(header.deleted_file_mode)
+        .map(str::parse::<FileMode>)
+        .transpose()?;
+    let new_mode = header
+        .new_mode
+        .or(header.new_file_mode)
+        .map(str::parse::<FileMode>)
+        .transpose()?;
+    Ok((old_mode, new_mode))
+}
+
+/// Extracts both old and new paths from `diff --git` line content.
+///
+/// ## Assumption #1: old and new paths are the same
+///
+/// This extraction has one strong assumption:
+/// Beside their prefixes, old and new paths are the same.
+///
+/// From [git-diff format documentation]:
+///
+/// > The `a/` and `b/` filenames are the same unless rename/copy is involved.
+/// > Especially, even for a creation or a deletion, `/dev/null` is not used
+/// > in place of the `a/` or `b/` filenames.
+/// >
+/// > When a rename/copy is involved, file1 and file2 show the name of the
+/// > source file of the rename/copy and the name of the file that the
+/// > rename/copy produces, respectively.
+///
+/// Since rename/copy operations use `rename from/to` and `copy from/to` headers
+/// we have handled earlier in [`extract_file_op_gitdiff`],
+/// (which have no `a/`/`b/` prefix per git spec),
+///
+/// this extraction is only used
+/// * when unified diff headers (`---`/`+++`) are absent
+/// * Only for mode-only and empty file cases
+///
+/// [git-diff format documentation]: https://git-scm.com/docs/diff-format
+///
+/// ## Assumption #2: the longest common path suffix is the shared path
+///
+/// When custom prefixes contain spaces,
+/// multiple splits may produce valid path suffixes.
+///
+/// Example: `src/foo.rs src/foo.rs src/foo.rs src/foo.rs`
+///
+/// Three splits all produce valid path suffixes (contain `/`):
+///
+/// * Position 10
+///   * old path: `src/foo.rs`
+///   * new path: `src/foo.rs src/foo.rs src/foo.rs`
+///   * common suffix: `foo.rs`
+/// * Position 21
+///   * old path: `src/foo.rs src/foo.rs`
+///   * new path: `src/foo.rs src/foo.rs`
+///   * common suffix: `foo.rs src/foo.rs`
+/// * Position 32
+///   * old path: `src/foo.rs src/foo.rs src/foo.rs`
+///   * new path: `src/foo.rs`
+///   * common suffix: `foo.rs`
+///
+/// We observed that `git apply` would pick position 21,
+/// which has the longest path suffix,
+/// hence this heuristic.
+///
+/// ## Supported formats
+///
+/// * `a/<path> b/<path>` (default prefix)
+/// * `<path> <path>` (`git diff --no-prefix`)
+/// * `<src-prefix><path> <dst-prefix><path>` (custom prefix)
+/// * `"<prefix><path>" "<prefix><path>"` (quoted, with escapes)
+/// * Mixed quoted/unquoted
+fn parse_diff_git_path(line: &str) -> Option<(Cow<'_, str>, Cow<'_, str>)> {
+    if line.starts_with('"') || line.ends_with('"') {
+        parse_quoted_diff_git_path(line)
+    } else {
+        parse_unquoted_diff_git_path(line)
+    }
+}
+
+/// See [`parse_diff_git_path`].
+fn parse_unquoted_diff_git_path(line: &str) -> Option<(Cow<'_, str>, Cow<'_, str>)> {
+    let mut best_match = None;
+    let mut longest_path = "";
+
+    for (i, _) in line.match_indices(' ') {
+        let left = &line[..i];
+        let right = &line[i + 1..];
+        if left.is_empty() || right.is_empty() {
+            continue;
+        }
+        // Select split with longest common path suffix (matches Git behavior)
+        if let Some(path) = longest_common_path_suffix(left, right) {
+            if path.len() > longest_path.len() {
+                longest_path = path;
+                best_match = Some((left, right));
+            }
+        }
+    }
+
+    best_match.map(|(l, r)| (Cow::Borrowed(l), Cow::Borrowed(r)))
+}
+
+/// See [`parse_diff_git_path`].
+fn parse_quoted_diff_git_path(line: &str) -> Option<(Cow<'_, str>, Cow<'_, str>)> {
+    let (left_raw, right_raw) = if line.starts_with('"') {
+        // First token is quoted.
+        let bytes = line.as_bytes();
+        let mut i = 1; // skip starting `"`
+        let end = loop {
+            match bytes.get(i)? {
+                b'"' => break i + 1,
+                b'\\' => i += 2,
+                _ => i += 1,
+            }
+        };
+        let (first, rest) = line.split_at(end);
+        let rest = rest.strip_prefix(' ')?;
+        (first, rest)
+    } else if let Some(pos) = line.find(" \"") {
+        // First token is unquoted. The second must be quoted.
+        (&line[..pos], &line[pos + 1..])
+    } else {
+        // Malformed: ends with `"` but no valid quoted path found
+        return None;
+    };
+
+    let left = escaped_filename(left_raw).ok().and_then(|cow| match cow {
+        Cow::Borrowed(b) => std::str::from_utf8(b).ok().map(Cow::Borrowed),
+        Cow::Owned(v) => String::from_utf8(v).ok().map(Cow::Owned),
+    })?;
+    let right = escaped_filename(right_raw).ok().and_then(|cow| match cow {
+        Cow::Borrowed(b) => std::str::from_utf8(b).ok().map(Cow::Borrowed),
+        Cow::Owned(v) => String::from_utf8(v).ok().map(Cow::Owned),
+    })?;
+
+    // Verify both sides share the same path.
+    longest_common_path_suffix(&left, &right)?;
+    Some((left, right))
+}
+
+/// Extracts the longest common path suffix starting at a path component boundary.
+///
+/// Returns `None` if no valid common path exists.
+///
+/// Path component boundary means:
+///
+/// * At `/` character (e.g., `foo/bar.rs` vs `fooo/bar.rs` → `bar.rs`)
+/// * Or the entire string is identical
+fn longest_common_path_suffix<'a>(a: &'a str, b: &'a str) -> Option<&'a str> {
+    if a.is_empty() || b.is_empty() {
+        return None;
+    }
+
+    let suffix_len = a
+        .as_bytes()
+        .iter()
+        .rev()
+        .zip(b.as_bytes().iter().rev())
+        .take_while(|(x, y)| x == y)
+        .count();
+
+    if suffix_len == 0 {
+        return None;
+    }
+
+    // Identical strings
+    if suffix_len == a.len() && a.len() == b.len() {
+        return Some(a);
+    }
+
+    // Find first '/' in suffix and return path after it
+    let suffix_start = a.len() - suffix_len;
+    let suffix = &a[suffix_start..];
+    suffix
+        .split_once('/')
+        .map(|(_, path)| path)
+        .filter(|p| !p.is_empty())
+}
+
 /// Extracts the file operation from a patch based on its header paths.
 pub(crate) fn extract_file_op_unidiff<'a>(
     original: Option<&Cow<'a, str>>,
diff --git a/src/patch_set/tests.rs b/src/patch_set/tests.rs
index 5bbfd31..9648930 100644
--- a/src/patch_set/tests.rs
+++ b/src/patch_set/tests.rs
@@ -463,3 +463,124 @@ In a hole in the ground there lived a hobbit
         );
     }
 }
+
+mod patchset_gitdiff {
+    use super::*;
+    use crate::patch_set::PatchKind;
+
+    fn parse_gitdiff(input: &str) -> Vec<super::super::FilePatch<'_, str>> {
+        PatchSet::parse(input, ParseOptions::gitdiff())
+            .collect::<Result<Vec<_>, _>>()
+            .unwrap()
+    }
+
+    /// `parse_one` must stop at `diff --git` boundaries so that
+    /// back-to-back patches are split correctly.
+    /// Without this, the second patch's `diff --git` line would be
+    /// swallowed as trailing junk by the first patch's hunk parser.
+    #[test]
+    fn multi_file_stops_at_diff_git_boundary() {
+        let input = "\
+diff --git a/foo b/foo
+--- a/foo
++++ b/foo
+@@ -1 +1 @@
+-old foo
++new foo
+diff --git a/bar b/bar
+--- a/bar
++++ b/bar
+@@ -1 +1 @@
+-old bar
++new bar
+";
+        let patches = parse_gitdiff(input);
+        assert_eq!(patches.len(), 2);
+    }
+
+    #[test]
+    fn pure_rename() {
+        let input = "\
+diff --git a/old.rs b/new.rs
+similarity index 100%
+rename from old.rs
+rename to new.rs
+";
+        let patches = parse_gitdiff(input);
+        assert_eq!(patches.len(), 1);
+        assert_eq!(
+            patches[0].operation(),
+            &FileOperation::Rename {
+                from: "old.rs".into(),
+                to: "new.rs".into(),
+            }
+        );
+    }
+
+    /// Empty file creation has no ---/+++ headers, so the path comes
+    /// from the `diff --git` line and retains the `b/` prefix.
+    /// Callers use `strip_prefix(1)` to remove it.
+    #[test]
+    fn new_empty_file() {
+        let input = "\
+diff --git a/empty b/empty
+new file mode 100644
+index 0000000..e69de29
+";
+        let patches = parse_gitdiff(input);
+        assert_eq!(patches.len(), 1);
+        assert_eq!(
+            patches[0].operation(),
+            &FileOperation::Create("b/empty".into())
+        );
+        match patches[0].patch() {
+            PatchKind::Text(p) => assert!(p.hunks().is_empty()),
+        }
+    }
+
+    #[test]
+    fn rename_then_modify() {
+        // Rename with no hunks followed by a modify with hunks.
+        // Tests that offset advances correctly across both.
+        let input = "\
+diff --git a/old.rs b/new.rs
+similarity index 100%
+rename from old.rs
+rename to new.rs
+diff --git a/foo b/foo
+--- a/foo
++++ b/foo
+@@ -1 +1 @@
+-old
++new
+";
+        let patches = parse_gitdiff(input);
+        assert_eq!(patches.len(), 2);
+        assert!(matches!(
+            patches[0].operation(),
+            FileOperation::Rename { .. }
+        ));
+        assert!(matches!(
+            patches[1].operation(),
+            FileOperation::Modify { .. }
+        ));
+    }
+
+    #[test]
+    fn binary_skip_does_not_stall() {
+        // Binary marker must be skipped and offset must advance
+        // to the next patch without infinite loop.
+        let input = "\
+diff --git a/img.png b/img.png
+Binary files a/img.png and b/img.png differ
+diff --git a/foo b/foo
+--- a/foo
++++ b/foo
+@@ -1 +1 @@
+-old
++new
+";
+        let patches = parse_gitdiff(input);
+        assert_eq!(patches.len(), 1, "binary should be skipped, text parsed");
+    }
+}

From 95836ec24a55ce50266ffd64cf6fbf24aee5f08d Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sat, 11 Apr 2026 12:46:38 -0400
Subject: [PATCH 08/26] test(compat): add git apply compatibility tests

Compat test for also `git apply`.
---
 tests/compat/common.rs                        | 111 ++++++++++++--
 .../compat/git/fail_both_devnull/in/foo.patch |   6 +
 .../format_patch_diff_in_message/in/file.txt  |   1 +
 .../format_patch_diff_in_message/in/foo.patch |  20 +++
 .../format_patch_diff_in_message/out/file.txt |   1 +
 .../in/file.txt                               |   1 +
 .../in/foo.patch                              |  21 +++
 .../out/file.txt                              |   3 +
 .../git/format_patch_preamble/in/file.txt     |   1 +
 .../git/format_patch_preamble/in/foo.patch    |  20 +++
 .../git/format_patch_preamble/out/file.txt    |   1 +
 .../git/format_patch_signature/in/file.txt    |   1 +
 .../git/format_patch_signature/in/foo.patch   |  19 +++
 .../git/format_patch_signature/out/file.txt   |   1 +
 .../compat/git/junk_between_files/in/bar.txt  |   1 +
 .../git/junk_between_files/in/foo.patch       |  17 +++
 .../compat/git/junk_between_files/in/foo.txt  |   1 +
 .../compat/git/junk_between_files/out/bar.txt |   1 +
 .../compat/git/junk_between_files/out/foo.txt |   1 +
 .../compat/git/junk_between_hunks/in/file.txt |   9 ++
 .../git/junk_between_hunks/in/foo.patch       |  15 ++
 .../git/junk_between_hunks/out/file.txt       |   9 ++
 tests/compat/git/mod.rs                       | 138 ++++++++++++++++++
 .../git/nested_diff_signature/in/example.rs   |   1 +
 .../git/nested_diff_signature/in/foo.patch    |  25 ++++
 .../nested_diff_signature/in/mir-test.diff    |  12 ++
 .../git/nested_diff_signature/out/example.rs  |   2 +
 .../nested_diff_signature/out/mir-test.diff   |   0
 .../git/path_ambiguous_suffix/in/foo.patch    |   3 +
 .../out/foo.rs src/foo.rs                     |   0
 .../path_containing_space_b/in/foo b/baz.txt  |   1 +
 .../git/path_containing_space_b/in/foo.patch  |   7 +
 .../path_containing_space_b/out/foo b/baz.txt |   1 +
 tests/compat/git/path_no_prefix/in/file.txt   |   1 +
 tests/compat/git/path_no_prefix/in/foo.patch  |   7 +
 tests/compat/git/path_no_prefix/out/file.txt  |   1 +
 .../git/path_quoted_escapes/in/foo\tbar.txt"  |   1 +
 .../git/path_quoted_escapes/in/foo.patch      |   7 +
 .../git/path_quoted_escapes/out/foo\tbar.txt" |   1 +
 .../git/path_quoted_named_escape/in/foo.patch |   6 +
 .../git/path_quoted_named_escape/out/bel\a"   |   1 +
 .../git/path_quoted_octal_escape/in/foo.patch |   6 +
 .../git/path_quoted_octal_escape/out/tl\033"  |   1 +
 .../git/path_with_spaces/in/foo bar.txt       |   1 +
 .../compat/git/path_with_spaces/in/foo.patch  |   7 +
 .../git/path_with_spaces/out/foo bar.txt      |   1 +
 tests/compat/main.rs                          |   4 +-
 47 files changed, 486 insertions(+), 10 deletions(-)
 create mode 100644 tests/compat/git/fail_both_devnull/in/foo.patch
 create mode 100644 tests/compat/git/format_patch_diff_in_message/in/file.txt
 create mode 100644 tests/compat/git/format_patch_diff_in_message/in/foo.patch
 create mode 100644 tests/compat/git/format_patch_diff_in_message/out/file.txt
 create mode 100644 tests/compat/git/format_patch_multiple_separators/in/file.txt
 create mode 100644 tests/compat/git/format_patch_multiple_separators/in/foo.patch
 create mode 100644 tests/compat/git/format_patch_multiple_separators/out/file.txt
 create mode 100644 tests/compat/git/format_patch_preamble/in/file.txt
 create mode 100644 tests/compat/git/format_patch_preamble/in/foo.patch
 create mode 100644 tests/compat/git/format_patch_preamble/out/file.txt
 create mode 100644 tests/compat/git/format_patch_signature/in/file.txt
 create mode 100644 tests/compat/git/format_patch_signature/in/foo.patch
 create mode 100644 tests/compat/git/format_patch_signature/out/file.txt
 create mode 100644 tests/compat/git/junk_between_files/in/bar.txt
 create mode 100644 tests/compat/git/junk_between_files/in/foo.patch
 create mode 100644 tests/compat/git/junk_between_files/in/foo.txt
 create mode 100644 tests/compat/git/junk_between_files/out/bar.txt
 create mode 100644 tests/compat/git/junk_between_files/out/foo.txt
 create mode 100644 tests/compat/git/junk_between_hunks/in/file.txt
 create mode 100644 tests/compat/git/junk_between_hunks/in/foo.patch
 create mode 100644 tests/compat/git/junk_between_hunks/out/file.txt
 create mode 100644 tests/compat/git/mod.rs
 create mode 100644 tests/compat/git/nested_diff_signature/in/example.rs
 create mode 100644 tests/compat/git/nested_diff_signature/in/foo.patch
 create mode 100644 tests/compat/git/nested_diff_signature/in/mir-test.diff
 create mode 100644 tests/compat/git/nested_diff_signature/out/example.rs
 create mode 100644 tests/compat/git/nested_diff_signature/out/mir-test.diff
 create mode 100644 tests/compat/git/path_ambiguous_suffix/in/foo.patch
 create mode 100644 tests/compat/git/path_ambiguous_suffix/out/foo.rs src/foo.rs
 create mode 100644 tests/compat/git/path_containing_space_b/in/foo b/baz.txt
 create mode 100644 tests/compat/git/path_containing_space_b/in/foo.patch
 create mode 100644 tests/compat/git/path_containing_space_b/out/foo b/baz.txt
 create mode 100644 tests/compat/git/path_no_prefix/in/file.txt
 create mode 100644 tests/compat/git/path_no_prefix/in/foo.patch
 create mode 100644 tests/compat/git/path_no_prefix/out/file.txt
 create mode 100644 "tests/compat/git/path_quoted_escapes/in/foo\tbar.txt"
 create mode 100644 tests/compat/git/path_quoted_escapes/in/foo.patch
 create mode 100644 "tests/compat/git/path_quoted_escapes/out/foo\tbar.txt"
 create mode 100644 tests/compat/git/path_quoted_named_escape/in/foo.patch
 create mode 100644 "tests/compat/git/path_quoted_named_escape/out/bel\a"
 create mode 100644 tests/compat/git/path_quoted_octal_escape/in/foo.patch
 create mode 100644 "tests/compat/git/path_quoted_octal_escape/out/tl\033"
 create mode 100644 tests/compat/git/path_with_spaces/in/foo bar.txt
 create mode 100644 tests/compat/git/path_with_spaces/in/foo.patch
 create mode 100644 tests/compat/git/path_with_spaces/out/foo bar.txt

diff --git a/tests/compat/common.rs b/tests/compat/common.rs
index 7bc6f37..117ed04 100644
--- a/tests/compat/common.rs
+++ b/tests/compat/common.rs
@@ -2,16 +2,27 @@
 
 use std::{
     fs,
+    io::Write,
     path::{Path, PathBuf},
-    process::Command,
+    process::{Command, Stdio},
     sync::Once,
 };
 
 use diffy::patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet, PatchSetParseError};
 
+/// Which external tool to compare against.
+#[derive(Clone, Copy)]
+pub enum CompatMode {
+    /// `git apply` with `ParseOptions::gitdiff()`
+    Git,
+    /// GNU `patch` with `ParseOptions::unidiff()`
+    GnuPatch,
+}
+
 /// A test case with fluent builder API.
 pub struct Case<'a> {
     case_name: &'a str,
+    mode: CompatMode,
     /// Strip level for path prefixes (default: 0)
     strip_level: u32,
     /// Whether diffy is expected to succeed (default: true)
@@ -21,20 +32,37 @@ pub struct Case<'a> {
 }
 
 impl<'a> Case<'a> {
+    /// Create a test case for `git apply` comparison.
+    pub fn git(name: &'a str) -> Self {
+        Self {
+            case_name: name,
+            mode: CompatMode::Git,
+            strip_level: 0,
+            expect_success: true,
+            expect_compat: true,
+        }
+    }
+
     /// Create a test case for GNU patch comparison.
     pub fn gnu_patch(name: &'a str) -> Self {
         Self {
             case_name: name,
+            mode: CompatMode::GnuPatch,
             strip_level: 0,
             expect_success: true,
             expect_compat: true,
         }
     }
 
-    /// Get the case directory path.
+    /// Get the case directory path based on mode.
     fn case_dir(&self) -> PathBuf {
+        let subdir = match self.mode {
+            CompatMode::Git => "git",
+            CompatMode::GnuPatch => "gnu_patch",
+        };
         PathBuf::from(env!("CARGO_MANIFEST_DIR"))
-            .join("tests/compat/gnu_patch")
+            .join("tests/compat")
+            .join(subdir)
             .join(self.case_name)
     }
 
@@ -62,12 +90,19 @@ impl<'a> Case<'a> {
             .unwrap_or_else(|e| panic!("failed to read {}: {e}", patch_path.display()));
 
         let case_name = self.case_name;
+        let prefix = match self.mode {
+            CompatMode::Git => "git",
+            CompatMode::GnuPatch => "gnu",
+        };
         let temp_base = temp_base();
 
-        let diffy_output = temp_base.join(format!("gnu-{case_name}-diffy"));
+        let diffy_output = temp_base.join(format!("{prefix}-{case_name}-diffy"));
         create_output_dir(&diffy_output);
 
-        let opts = ParseOptions::unidiff();
+        let opts = match self.mode {
+            CompatMode::Git => ParseOptions::gitdiff(),
+            CompatMode::GnuPatch => ParseOptions::unidiff(),
+        };
 
         // Apply with diffy
         let diffy_result = apply_diffy(&in_dir, &patch, &diffy_output, opts, self.strip_level);
@@ -81,12 +116,19 @@ impl<'a> Case<'a> {
 
         // In CI mode, also verify external tool behavior
         if is_ci() {
-            let external_output = temp_base.join(format!("gnu-{case_name}-external"));
+            let external_output = temp_base.join(format!("{prefix}-{case_name}-external"));
             create_output_dir(&external_output);
 
-            print_patch_version();
-            let external_result =
-                gnu_patch_apply(&in_dir, &patch_path, &external_output, self.strip_level);
+            let external_result = match self.mode {
+                CompatMode::Git => {
+                    print_git_version();
+                    git_apply(&external_output, &patch, self.strip_level, &in_dir)
+                }
+                CompatMode::GnuPatch => {
+                    print_patch_version();
+                    gnu_patch_apply(&in_dir, &patch_path, &external_output, self.strip_level)
+                }
+            };
 
             // For success cases where both succeed and are expected to be compatible,
             // verify outputs match
@@ -120,6 +162,39 @@ impl<'a> Case<'a> {
 
 // External tool invocations
 
+fn git_apply(
+    output_dir: &Path,
+    patch: &str,
+    strip_level: u32,
+    in_dir: &Path,
+) -> Result<(), String> {
+    copy_input_files(in_dir, output_dir, &["patch"]);
+
+    let mut cmd = Command::new("git");
+    cmd.env("GIT_CONFIG_NOSYSTEM", "1");
+    cmd.env("GIT_CONFIG_GLOBAL", "/dev/null");
+    cmd.current_dir(output_dir);
+    cmd.args(["apply", &format!("-p{strip_level}"), "-"]);
+    cmd.stdin(Stdio::piped());
+    cmd.stdout(Stdio::piped());
+    cmd.stderr(Stdio::piped());
+
+    let mut child = cmd.spawn().expect("failed to spawn git apply");
+    child
+        .stdin
+        .as_mut()
+        .unwrap()
+        .write_all(patch.as_bytes())
+        .unwrap();
+
+    let output = child.wait_with_output().unwrap();
+    if output.status.success() {
+        Ok(())
+    } else {
+        Err(String::from_utf8_lossy(&output.stderr).to_string())
+    }
+}
+
 fn gnu_patch_apply(
     in_dir: &Path,
     patch_path: &Path,
@@ -149,6 +224,24 @@ fn gnu_patch_apply(
     }
 }
 
+fn print_git_version() {
+    static ONCE: Once = Once::new();
+    ONCE.call_once(|| {
+        let output = Command::new("git").arg("--version").output();
+        match output {
+            Ok(o) if o.status.success() => {
+                let version = String::from_utf8_lossy(&o.stdout);
+                eprintln!(
+                    "git version: {}",
+                    version.lines().next().unwrap_or("unknown")
+                );
+            }
+            Ok(o) => eprintln!("git --version failed: {}", o.status),
+            Err(e) => eprintln!("git command not found: {e}"),
+        }
+    });
+}
+
 fn print_patch_version() {
     static ONCE: Once = Once::new();
     ONCE.call_once(|| {
diff --git a/tests/compat/git/fail_both_devnull/in/foo.patch b/tests/compat/git/fail_both_devnull/in/foo.patch
new file mode 100644
index 0000000..26b2827
--- /dev/null
+++ b/tests/compat/git/fail_both_devnull/in/foo.patch
@@ -0,0 +1,6 @@
+diff --git a/foo b/foo
+--- /dev/null
++++ /dev/null
+@@ -1 +1 @@
+-old
++new
diff --git a/tests/compat/git/format_patch_diff_in_message/in/file.txt b/tests/compat/git/format_patch_diff_in_message/in/file.txt
new file mode 100644
index 0000000..3367afd
--- /dev/null
+++ b/tests/compat/git/format_patch_diff_in_message/in/file.txt
@@ -0,0 +1 @@
+old
diff --git a/tests/compat/git/format_patch_diff_in_message/in/foo.patch b/tests/compat/git/format_patch_diff_in_message/in/foo.patch
new file mode 100644
index 0000000..ce33371
--- /dev/null
+++ b/tests/compat/git/format_patch_diff_in_message/in/foo.patch
@@ -0,0 +1,20 @@
+From 8a14b135fe7ba10bab09a77c4a687faaf1d92a26 Mon Sep 17 00:00:00 2001
+From: Test <test@example.com>
+Date: Wed, 1 Jan 2020 00:00:00 +0000
+Subject: [PATCH] Fix diff --git parsing bug
+
+The line `diff --git a/x b/x` in messages was incorrectly parsed.
+---
+ file.txt | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/file.txt b/file.txt
+index 3367afd..3e75765 100644
+--- a/file.txt
++++ b/file.txt
+@@ -1 +1 @@
+-old
++new
+-- 
+2.52.0
+
diff --git a/tests/compat/git/format_patch_diff_in_message/out/file.txt b/tests/compat/git/format_patch_diff_in_message/out/file.txt
new file mode 100644
index 0000000..3e75765
--- /dev/null
+++ b/tests/compat/git/format_patch_diff_in_message/out/file.txt
@@ -0,0 +1 @@
+new
diff --git a/tests/compat/git/format_patch_multiple_separators/in/file.txt b/tests/compat/git/format_patch_multiple_separators/in/file.txt
new file mode 100644
index 0000000..3367afd
--- /dev/null
+++ b/tests/compat/git/format_patch_multiple_separators/in/file.txt
@@ -0,0 +1 @@
+old
diff --git a/tests/compat/git/format_patch_multiple_separators/in/foo.patch b/tests/compat/git/format_patch_multiple_separators/in/foo.patch
new file mode 100644
index 0000000..a0da5b9
--- /dev/null
+++ b/tests/compat/git/format_patch_multiple_separators/in/foo.patch
@@ -0,0 +1,21 @@
+From 6bfbbfa49a16bb8173145a933fe5ad918ad48a31 Mon Sep 17 00:00:00 2001
+From: Test <test@example.com>
+Date: Wed, 1 Jan 2020 00:00:00 +0000
+Subject: [PATCH] Add content with --- markers
+
+---
+ file.txt | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/file.txt b/file.txt
+index 3367afd..c4d4ea8 100644
+--- a/file.txt
++++ b/file.txt
+@@ -1 +1,3 @@
+-old
++---
++new
++---
+-- 
+2.52.0
+
diff --git a/tests/compat/git/format_patch_multiple_separators/out/file.txt b/tests/compat/git/format_patch_multiple_separators/out/file.txt
new file mode 100644
index 0000000..c4d4ea8
--- /dev/null
+++ b/tests/compat/git/format_patch_multiple_separators/out/file.txt
@@ -0,0 +1,3 @@
+---
+new
+---
diff --git a/tests/compat/git/format_patch_preamble/in/file.txt b/tests/compat/git/format_patch_preamble/in/file.txt
new file mode 100644
index 0000000..3367afd
--- /dev/null
+++ b/tests/compat/git/format_patch_preamble/in/file.txt
@@ -0,0 +1 @@
+old
diff --git a/tests/compat/git/format_patch_preamble/in/foo.patch b/tests/compat/git/format_patch_preamble/in/foo.patch
new file mode 100644
index 0000000..9d90425
--- /dev/null
+++ b/tests/compat/git/format_patch_preamble/in/foo.patch
@@ -0,0 +1,20 @@
+From ddbc9053359329dd016ed89f0d6e460b3b8ff5e3 Mon Sep 17 00:00:00 2001
+From: Test <test@example.com>
+Date: Wed, 1 Jan 2020 00:00:00 +0000
+Subject: [PATCH] Add new content
+
+This is the commit body.
+---
+ file.txt | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/file.txt b/file.txt
+index 3367afd..3e75765 100644
+--- a/file.txt
++++ b/file.txt
+@@ -1 +1 @@
+-old
++new
+-- 
+2.52.0
+
diff --git a/tests/compat/git/format_patch_preamble/out/file.txt b/tests/compat/git/format_patch_preamble/out/file.txt
new file mode 100644
index 0000000..3e75765
--- /dev/null
+++ b/tests/compat/git/format_patch_preamble/out/file.txt
@@ -0,0 +1 @@
+new
diff --git a/tests/compat/git/format_patch_signature/in/file.txt b/tests/compat/git/format_patch_signature/in/file.txt
new file mode 100644
index 0000000..3367afd
--- /dev/null
+++ b/tests/compat/git/format_patch_signature/in/file.txt
@@ -0,0 +1 @@
+old
diff --git a/tests/compat/git/format_patch_signature/in/foo.patch b/tests/compat/git/format_patch_signature/in/foo.patch
new file mode 100644
index 0000000..7614287
--- /dev/null
+++ b/tests/compat/git/format_patch_signature/in/foo.patch
@@ -0,0 +1,19 @@
+From b3bb3125eff3d2648f15af2a6e0cdcdf6ad8fce1 Mon Sep 17 00:00:00 2001
+From: Test <test@example.com>
+Date: Wed, 1 Jan 2020 00:00:00 +0000
+Subject: [PATCH] modify
+
+---
+ file.txt | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/file.txt b/file.txt
+index 3367afd..3e75765 100644
+--- a/file.txt
++++ b/file.txt
+@@ -1 +1 @@
+-old
++new
+-- 
+2.52.0
+
diff --git a/tests/compat/git/format_patch_signature/out/file.txt b/tests/compat/git/format_patch_signature/out/file.txt
new file mode 100644
index 0000000..3e75765
--- /dev/null
+++ b/tests/compat/git/format_patch_signature/out/file.txt
@@ -0,0 +1 @@
+new
diff --git a/tests/compat/git/junk_between_files/in/bar.txt b/tests/compat/git/junk_between_files/in/bar.txt
new file mode 100644
index 0000000..601d8ee
--- /dev/null
+++ b/tests/compat/git/junk_between_files/in/bar.txt
@@ -0,0 +1 @@
+bar line1
diff --git a/tests/compat/git/junk_between_files/in/foo.patch b/tests/compat/git/junk_between_files/in/foo.patch
new file mode 100644
index 0000000..dbc5939
--- /dev/null
+++ b/tests/compat/git/junk_between_files/in/foo.patch
@@ -0,0 +1,17 @@
+diff --git a/foo.txt b/foo.txt
+index 1234567..89abcdef 100644
+--- a/foo.txt
++++ b/foo.txt
+@@ -1 +1 @@
+-foo line1
++FOO LINE1
+JUNK BETWEEN FILES!!!!
+This preamble text should be ignored
+by both git apply and diffy
+diff --git a/bar.txt b/bar.txt
+index 1234567..89abcdef 100644
+--- a/bar.txt
++++ b/bar.txt
+@@ -1 +1 @@
+-bar line1
++BAR LINE1
diff --git a/tests/compat/git/junk_between_files/in/foo.txt b/tests/compat/git/junk_between_files/in/foo.txt
new file mode 100644
index 0000000..b11358e
--- /dev/null
+++ b/tests/compat/git/junk_between_files/in/foo.txt
@@ -0,0 +1 @@
+foo line1
diff --git a/tests/compat/git/junk_between_files/out/bar.txt b/tests/compat/git/junk_between_files/out/bar.txt
new file mode 100644
index 0000000..76c036d
--- /dev/null
+++ b/tests/compat/git/junk_between_files/out/bar.txt
@@ -0,0 +1 @@
+BAR LINE1
diff --git a/tests/compat/git/junk_between_files/out/foo.txt b/tests/compat/git/junk_between_files/out/foo.txt
new file mode 100644
index 0000000..787bc66
--- /dev/null
+++ b/tests/compat/git/junk_between_files/out/foo.txt
@@ -0,0 +1 @@
+FOO LINE1
diff --git a/tests/compat/git/junk_between_hunks/in/file.txt b/tests/compat/git/junk_between_hunks/in/file.txt
new file mode 100644
index 0000000..822aed3
--- /dev/null
+++ b/tests/compat/git/junk_between_hunks/in/file.txt
@@ -0,0 +1,9 @@
+line1
+line2
+line3
+line4
+line5
+line6
+line7
+line8
+line9
diff --git a/tests/compat/git/junk_between_hunks/in/foo.patch b/tests/compat/git/junk_between_hunks/in/foo.patch
new file mode 100644
index 0000000..242e959
--- /dev/null
+++ b/tests/compat/git/junk_between_hunks/in/foo.patch
@@ -0,0 +1,15 @@
+diff --git a/file.txt b/file.txt
+index 1234567..89abcdef 100644
+--- a/file.txt
++++ b/file.txt
+@@ -1,3 +1,3 @@
+-line1
++LINE1
+ line2
+ line3
+JUNK BETWEEN HUNKS
+@@ -7,3 +7,3 @@
+ line7
+-line8
++LINE8
+ line9
diff --git a/tests/compat/git/junk_between_hunks/out/file.txt b/tests/compat/git/junk_between_hunks/out/file.txt
new file mode 100644
index 0000000..2e5e454
--- /dev/null
+++ b/tests/compat/git/junk_between_hunks/out/file.txt
@@ -0,0 +1,9 @@
+LINE1
+line2
+line3
+line4
+line5
+line6
+line7
+line8
+line9
diff --git a/tests/compat/git/mod.rs b/tests/compat/git/mod.rs
new file mode 100644
index 0000000..e7dc99b
--- /dev/null
+++ b/tests/compat/git/mod.rs
@@ -0,0 +1,138 @@
+//! Git compatibility tests. See [`crate`] for test structure and usage.
+//!
+//! Focus areas:
+//!
+//! - `diff --git` path parsing edge cases (quotes, spaces, ambiguous prefixes)
+//! - `git format-patch` email format (preamble/signature stripping)
+//! - Agreement between diffy and `git apply`
+
+use crate::common::Case;
+
+#[test]
+fn path_no_prefix() {
+    Case::git("path_no_prefix").run();
+}
+
+#[test]
+fn path_quoted_escapes() {
+    Case::git("path_quoted_escapes").strip(1).run();
+}
+
+// Git uses C-style named escapes (\a, \b, \f, \v) for certain control
+// characters in quoted filenames. Both `git apply` and GNU patch decode
+// these correctly.
+//
+// Observed with git 2.53.0:
+//   $ printf 'x' > "$(printf 'bel\a')" && git add -A
+//   $ git diff --cached | grep '+++'
+//   +++ "b/bel\a"
+//
+// diffy now decodes these correctly.
+#[test]
+fn path_quoted_named_escape() {
+    Case::git("path_quoted_named_escape").strip(1).run();
+}
+
+// Git uses 3-digit octal escapes (\000-\377) for bytes that don't have
+// a named escape. Both `git apply` and GNU patch decode these correctly.
+//
+// Observed with git 2.53.0:
+//   $ printf 'x' > "$(printf 'tl\033')" && git add -A
+//   $ git diff --cached | grep '+++'
+//   +++ "b/tl\033"
+//
+// Found via full-history replay test against llvm/llvm-project
+// (commits 17af06ba..229c95ab, 6c031780..0683a1e5).
+#[test]
+fn path_quoted_octal_escape() {
+    Case::git("path_quoted_octal_escape").strip(1).run();
+}
+
+#[test]
+fn path_with_spaces() {
+    Case::git("path_with_spaces").strip(1).run();
+}
+
+#[test]
+fn path_containing_space_b() {
+    Case::git("path_containing_space_b").strip(1).run();
+}
+
+#[test]
+fn format_patch_preamble() {
+    // Ambiguous: where does preamble end? First `\n---\n` - verify matches git
+    Case::git("format_patch_preamble").strip(1).run();
+}
+
+#[test]
+fn format_patch_diff_in_message() {
+    // `diff --git` in commit message must NOT trigger early parsing
+    Case::git("format_patch_diff_in_message").strip(1).run();
+}
+
+#[test]
+fn format_patch_multiple_separators() {
+    // Git uses first `\n---\n` as separator (observed git mailinfo behavior)
+    Case::git("format_patch_multiple_separators").strip(1).run();
+}
+
+#[test]
+fn format_patch_signature() {
+    // Ambiguous: `\n-- \n` could appear in patch content - verify matches git
+    Case::git("format_patch_signature").strip(1).run();
+}
+
+#[test]
+fn nested_diff_signature() {
+    // Patch that deletes a diff file containing `-- ` patterns within its content,
+    // followed by a real email signature at the end.
+    //
+    // Tests that we correctly distinguish between:
+    // - `-- ` appearing as patch content (from inner diff's empty context lines)
+    // - `-- ` appearing as the actual email signature separator
+    //
+    // Both git apply and GNU patch handle this correctly.
+    Case::git("nested_diff_signature").strip(1).run();
+}
+
+#[test]
+fn path_ambiguous_suffix() {
+    // Multiple valid splits in `diff --git` line; algorithm picks longest common suffix.
+    // Tests the pathological case from parse.rs comments where custom prefix
+    // creates `src/foo.rs src/foo.rs src/foo.rs src/foo.rs` - verify matches git.
+    Case::git("path_ambiguous_suffix").strip(1).run();
+}
+
+// Both --- and +++ point to /dev/null.
+// git apply rejects: "dev/null: No such file or directory"
+#[test]
+fn fail_both_devnull() {
+    Case::git("fail_both_devnull")
+        .strip(1)
+        .expect_success(false)
+        .run();
+}
+
+// Single-file patch with junk between hunks.
+//
+// - git apply: errors ("patch fragment without header")
+// - diffy: succeeds, ignores trailing junk (matches GNU patch behavior)
+#[test]
+fn junk_between_hunks() {
+    Case::git("junk_between_hunks")
+        .strip(1)
+        .expect_compat(false)
+        .run();
+}
+
+// Multi-file patch with junk/preamble text between different files.
+//
+// git apply behavior: Ignores content between `diff --git` boundaries.
+// In GitDiff mode, splitting occurs at `diff --git`, so junk between
+// files becomes trailing content of the previous chunk (harmless).
+//
+// This is different from junk between HUNKS of the same file (which fails).
+#[test]
+fn junk_between_files() {
+    Case::git("junk_between_files").strip(1).run();
+}
diff --git a/tests/compat/git/nested_diff_signature/in/example.rs b/tests/compat/git/nested_diff_signature/in/example.rs
new file mode 100644
index 0000000..8f3b7ef
--- /dev/null
+++ b/tests/compat/git/nested_diff_signature/in/example.rs
@@ -0,0 +1 @@
+fn foo() {}
diff --git a/tests/compat/git/nested_diff_signature/in/foo.patch b/tests/compat/git/nested_diff_signature/in/foo.patch
new file mode 100644
index 0000000..5d876c6
--- /dev/null
+++ b/tests/compat/git/nested_diff_signature/in/foo.patch
@@ -0,0 +1,25 @@
+diff --git a/mir-test.diff b/mir-test.diff
+deleted file mode 100644
+index 98012d7..0000000
+--- a/mir-test.diff
++++ /dev/null
+@@ -1,12 +0,0 @@
+-- // MIR before
+-+ // MIR after
+-
+-  fn opt() {
+-      bb0: {
+--         nop;
+--     }
+-- 
+--     bb1: {
+--         nop;
+-      }
+-  }
+diff --git a/example.rs b/example.rs
+index 8f3b7ef..2a40712 100644
+--- a/example.rs
++++ b/example.rs
+@@ -1 +1,2 @@
+ fn foo() {}
++fn bar() {}
diff --git a/tests/compat/git/nested_diff_signature/in/mir-test.diff b/tests/compat/git/nested_diff_signature/in/mir-test.diff
new file mode 100644
index 0000000..98012d7
--- /dev/null
+++ b/tests/compat/git/nested_diff_signature/in/mir-test.diff
@@ -0,0 +1,12 @@
+- // MIR before
++ // MIR after
+
+  fn opt() {
+      bb0: {
+-         nop;
+-     }
+- 
+-     bb1: {
+-         nop;
+      }
+  }
diff --git a/tests/compat/git/nested_diff_signature/out/example.rs b/tests/compat/git/nested_diff_signature/out/example.rs
new file mode 100644
index 0000000..2a40712
--- /dev/null
+++ b/tests/compat/git/nested_diff_signature/out/example.rs
@@ -0,0 +1,2 @@
+fn foo() {}
+fn bar() {}
diff --git a/tests/compat/git/nested_diff_signature/out/mir-test.diff b/tests/compat/git/nested_diff_signature/out/mir-test.diff
new file mode 100644
index 0000000..e69de29
diff --git a/tests/compat/git/path_ambiguous_suffix/in/foo.patch b/tests/compat/git/path_ambiguous_suffix/in/foo.patch
new file mode 100644
index 0000000..a6815eb
--- /dev/null
+++ b/tests/compat/git/path_ambiguous_suffix/in/foo.patch
@@ -0,0 +1,3 @@
+diff --git src/foo.rs src/foo.rs src/foo.rs src/foo.rs
+new file mode 100644
+index 0000000..e69de29
diff --git a/tests/compat/git/path_ambiguous_suffix/out/foo.rs src/foo.rs b/tests/compat/git/path_ambiguous_suffix/out/foo.rs src/foo.rs
new file mode 100644
index 0000000..e69de29
diff --git a/tests/compat/git/path_containing_space_b/in/foo b/baz.txt b/tests/compat/git/path_containing_space_b/in/foo b/baz.txt
new file mode 100644
index 0000000..3367afd
--- /dev/null
+++ b/tests/compat/git/path_containing_space_b/in/foo b/baz.txt	
@@ -0,0 +1 @@
+old
diff --git a/tests/compat/git/path_containing_space_b/in/foo.patch b/tests/compat/git/path_containing_space_b/in/foo.patch
new file mode 100644
index 0000000..15c6fca
--- /dev/null
+++ b/tests/compat/git/path_containing_space_b/in/foo.patch
@@ -0,0 +1,7 @@
+diff --git a/foo b/baz.txt b/foo b/baz.txt
+index 3367afd..3e75765 100644
+--- a/foo b/baz.txt
++++ b/foo b/baz.txt
+@@ -1 +1 @@
+-old
++new
diff --git a/tests/compat/git/path_containing_space_b/out/foo b/baz.txt b/tests/compat/git/path_containing_space_b/out/foo b/baz.txt
new file mode 100644
index 0000000..3e75765
--- /dev/null
+++ b/tests/compat/git/path_containing_space_b/out/foo b/baz.txt	
@@ -0,0 +1 @@
+new
diff --git a/tests/compat/git/path_no_prefix/in/file.txt b/tests/compat/git/path_no_prefix/in/file.txt
new file mode 100644
index 0000000..3367afd
--- /dev/null
+++ b/tests/compat/git/path_no_prefix/in/file.txt
@@ -0,0 +1 @@
+old
diff --git a/tests/compat/git/path_no_prefix/in/foo.patch b/tests/compat/git/path_no_prefix/in/foo.patch
new file mode 100644
index 0000000..5e2a9f8
--- /dev/null
+++ b/tests/compat/git/path_no_prefix/in/foo.patch
@@ -0,0 +1,7 @@
+diff --git file.txt file.txt
+index 3367afd..3e75765 100644
+--- file.txt
++++ file.txt
+@@ -1 +1 @@
+-old
++new
diff --git a/tests/compat/git/path_no_prefix/out/file.txt b/tests/compat/git/path_no_prefix/out/file.txt
new file mode 100644
index 0000000..3e75765
--- /dev/null
+++ b/tests/compat/git/path_no_prefix/out/file.txt
@@ -0,0 +1 @@
+new
diff --git "a/tests/compat/git/path_quoted_escapes/in/foo\tbar.txt" "b/tests/compat/git/path_quoted_escapes/in/foo\tbar.txt"
new file mode 100644
index 0000000..3367afd
--- /dev/null
+++ "b/tests/compat/git/path_quoted_escapes/in/foo\tbar.txt"
@@ -0,0 +1 @@
+old
diff --git a/tests/compat/git/path_quoted_escapes/in/foo.patch b/tests/compat/git/path_quoted_escapes/in/foo.patch
new file mode 100644
index 0000000..2669223
--- /dev/null
+++ b/tests/compat/git/path_quoted_escapes/in/foo.patch
@@ -0,0 +1,7 @@
+diff --git "a/foo\tbar.txt" "b/foo\tbar.txt"
+index 3367afd..3e75765 100644
+--- "a/foo\tbar.txt"
++++ "b/foo\tbar.txt"
+@@ -1 +1 @@
+-old
++new
diff --git "a/tests/compat/git/path_quoted_escapes/out/foo\tbar.txt" "b/tests/compat/git/path_quoted_escapes/out/foo\tbar.txt"
new file mode 100644
index 0000000..3e75765
--- /dev/null
+++ "b/tests/compat/git/path_quoted_escapes/out/foo\tbar.txt"
@@ -0,0 +1 @@
+new
diff --git a/tests/compat/git/path_quoted_named_escape/in/foo.patch b/tests/compat/git/path_quoted_named_escape/in/foo.patch
new file mode 100644
index 0000000..cfd3f56
--- /dev/null
+++ b/tests/compat/git/path_quoted_named_escape/in/foo.patch
@@ -0,0 +1,6 @@
+diff --git "a/bel\a" "b/bel\a"
+new file mode 100644
+--- /dev/null
++++ "b/bel\a"
+@@ -0,0 +1 @@
++hello
diff --git "a/tests/compat/git/path_quoted_named_escape/out/bel\a" "b/tests/compat/git/path_quoted_named_escape/out/bel\a"
new file mode 100644
index 0000000..ce01362
--- /dev/null
+++ "b/tests/compat/git/path_quoted_named_escape/out/bel\a"
@@ -0,0 +1 @@
+hello
diff --git a/tests/compat/git/path_quoted_octal_escape/in/foo.patch b/tests/compat/git/path_quoted_octal_escape/in/foo.patch
new file mode 100644
index 0000000..5dda9ec
--- /dev/null
+++ b/tests/compat/git/path_quoted_octal_escape/in/foo.patch
@@ -0,0 +1,6 @@
+diff --git "a/tl\033" "b/tl\033"
+new file mode 100644
+--- /dev/null
++++ "b/tl\033"
+@@ -0,0 +1 @@
++hello
diff --git "a/tests/compat/git/path_quoted_octal_escape/out/tl\033" "b/tests/compat/git/path_quoted_octal_escape/out/tl\033"
new file mode 100644
index 0000000..ce01362
--- /dev/null
+++ "b/tests/compat/git/path_quoted_octal_escape/out/tl\033"
@@ -0,0 +1 @@
+hello
diff --git a/tests/compat/git/path_with_spaces/in/foo bar.txt b/tests/compat/git/path_with_spaces/in/foo bar.txt
new file mode 100644
index 0000000..3367afd
--- /dev/null
+++ b/tests/compat/git/path_with_spaces/in/foo bar.txt	
@@ -0,0 +1 @@
+old
diff --git a/tests/compat/git/path_with_spaces/in/foo.patch b/tests/compat/git/path_with_spaces/in/foo.patch
new file mode 100644
index 0000000..b3d1d46
--- /dev/null
+++ b/tests/compat/git/path_with_spaces/in/foo.patch
@@ -0,0 +1,7 @@
+diff --git a/foo bar.txt b/foo bar.txt
+index 3367afd..3e75765 100644
+--- a/foo bar.txt
++++ b/foo bar.txt
+@@ -1 +1 @@
+-old
++new
diff --git a/tests/compat/git/path_with_spaces/out/foo bar.txt b/tests/compat/git/path_with_spaces/out/foo bar.txt
new file mode 100644
index 0000000..3e75765
--- /dev/null
+++ b/tests/compat/git/path_with_spaces/out/foo bar.txt	
@@ -0,0 +1 @@
+new
diff --git a/tests/compat/main.rs b/tests/compat/main.rs
index 8faf9ea..e35ed07 100644
--- a/tests/compat/main.rs
+++ b/tests/compat/main.rs
@@ -39,9 +39,11 @@
 //!
 //! 1. Create `case_name/in/` with input file(s) and `foo.patch`
 //! 2. Run `SNAPSHOTS=overwrite cargo test --test compat` to generate `out/`
-//! 3. Add `#[test] fn case_name() { Case::gnu_patch(...).run(); }` in the module
+//! 3. Add `#[test] fn case_name() { Case::{gnu_patch,git}(...).run(); }` in the module
 //!
 //! For failure tests, use `.expect_success(false)` and skip step 2.
+//! For intentional compat divergence, use `.expect_compat(false)`.
 
 mod common;
+mod git;
 mod gnu_patch;

From ff392af700443c665ebdf62d85758af931bdb28c Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sat, 11 Apr 2026 13:13:10 -0400
Subject: [PATCH 09/26] test(replay): add gitdiff mode support

Unlike unidiff,
gitdiff produces patches for empty file creations/deletions
(`0\t0` in numstat)
because they carry `diff --git` + extended headers even without hunks.

Binary files (`-\t-\t`) are skipped in gitdiff mode for now.
---
 tests/replay.rs | 38 +++++++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/tests/replay.rs b/tests/replay.rs
index 7eaa56b..1d4e731 100644
--- a/tests/replay.rs
+++ b/tests/replay.rs
@@ -18,8 +18,7 @@
 //!   * A range (e.g., `abc123..def456`) for a specific commit range
 //!
 //!   Defaults to 200. Use `0` to verify entire history.
-//! * `DIFFY_TEST_PARSE_MODE`: Parse mode to use.
-//!   Currently only `unidiff` is supported.
+//! * `DIFFY_TEST_PARSE_MODE`: Parse mode to use (`unidiff` or `gitdiff`).
 //!   Defaults to `unidiff`.
 //!
 //! ## Requirements
@@ -161,12 +160,14 @@ impl CatFile {
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 enum TestMode {
     UniDiff,
+    GitDiff,
 }
 
 impl From<TestMode> for ParseOptions {
     fn from(value: TestMode) -> Self {
         match value {
             TestMode::UniDiff => ParseOptions::unidiff(),
+            TestMode::GitDiff => ParseOptions::gitdiff(),
         }
     }
 }
@@ -228,7 +229,8 @@ fn test_mode() -> TestMode {
     };
     match val.trim().to_lowercase().as_str() {
         "unidiff" => TestMode::UniDiff,
-        _ => panic!("invalid DIFFY_TEST_PARSE_MODE='{val}': expected 'unidiff'"),
+        "gitdiff" => TestMode::GitDiff,
+        _ => panic!("invalid DIFFY_TEST_PARSE_MODE='{val}': expected 'unidiff' or 'gitdiff'"),
     }
 }
 
@@ -323,8 +325,13 @@ fn process_commit(
 
     // UniDiff format cannot express pure renames (no ---/+++ headers).
     // Use `--no-renames` to represent them as delete + create instead.
+    // GitDiff mode handles renames via extended headers natively.
     let diff_output = match mode {
         TestMode::UniDiff => git(repo, &["diff", "--no-renames", parent, child]),
+        // TODO: pass `--binary` once binary patch support lands,
+        // so binary files get actual delta/literal data instead of
+        // "Binary files differ" markers.
+        TestMode::GitDiff => git(repo, &["diff", parent, child]),
     };
 
     if diff_output.is_empty() {
@@ -373,6 +380,30 @@ fn process_commit(
             }
             text_files + type_changes
         }
+        TestMode::GitDiff => {
+            // Can't use `--numstat` for GitDiff: it shows `-\t-\t` for both
+            // actual binary diffs AND pure binary renames (100% similarity).
+            // Parser correctly handles pure renames (rename headers, no binary content).
+            //
+            // Use `--raw` for total count, subtract actual binary markers from diff.
+            //
+            // TODO: once `--binary` is passed above, count ALL `--raw`
+            // entries — every file will have patch data (delta, literal, or text).
+            let raw = git(repo, &["diff", "--raw", parent, child]);
+            let (mut total, mut type_changes) = (0, 0);
+            for line in raw.lines().filter(|l| !l.is_empty()) {
+                total += 1;
+                if is_type_change(line) {
+                    type_changes += 1;
+                }
+            }
+            let binary = diff_output
+                .lines()
+                .filter(|l| l.starts_with("Binary files ") || l.starts_with("GIT binary patch"))
+                .count();
+            skipped += binary;
+            total - binary + type_changes
+        }
     };
 
     if expected_file_count == 0 {
@@ -518,6 +549,7 @@ fn replay() {
         .unwrap_or_else(|| ".".to_string());
     let mode_name = match mode {
         TestMode::UniDiff => "unidiff",
+        TestMode::GitDiff => "gitdiff",
     };
 
     // Shared state for progress reporting

From e5895d96bcca079a625dbe644a6110f1015a8ae8 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sun, 12 Apr 2026 12:59:07 -0400
Subject: [PATCH 10/26] chore(ci): run gitdiff also in replay workflow

---
 .github/workflows/replay.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/replay.yml b/.github/workflows/replay.yml
index a9f8f99..08384d6 100644
--- a/.github/workflows/replay.yml
+++ b/.github/workflows/replay.yml
@@ -44,7 +44,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        parse_mode: [unidiff]
+        parse_mode: [unidiff, gitdiff]
     name: ${{ inputs.name && matrix.parse_mode || format('{0} ({1}, {2})', inputs.repo_url, matrix.parse_mode, inputs.commits) }}
     steps:
       - uses: actions/checkout@v6

From f1eb3c68c5e33776dc0dc55ddbf152a8752d854a Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sun, 12 Apr 2026 18:46:08 -0400
Subject: [PATCH 11/26] feat(binary): binary patch types and parser

* Added types representing both literal and delta Git binary patches
* Added a parser for the `GIT binary patch` format.

This doesn't include the patch application
(which will be added in later commits)

The implementation is based on

* Specification from <https://diffx.org/spec/binary-diffs.html>
* Behavior observation of Git CLI
---
 src/binary/mod.rs | 352 ++++++++++++++++++++++++++++++++++++++++++++++
 src/lib.rs        |   1 +
 2 files changed, 353 insertions(+)
 create mode 100644 src/binary/mod.rs

diff --git a/src/binary/mod.rs b/src/binary/mod.rs
new file mode 100644
index 0000000..696f0d6
--- /dev/null
+++ b/src/binary/mod.rs
@@ -0,0 +1,352 @@
+//! Git binary diffs support.
+//!
+//! This module provides parsing and decoding for Git's binary diff format,
+//! as generated by `git diff --binary` or `git format-patch --binary`.
+//!
+//! Based on [DiffX Binary Diffs specification](https://diffx.org/spec/binary-diffs.html).
+
+use std::{fmt, ops::Range};
+
+/// The type of a binary patch block.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum BinaryBlockKind {
+    /// [Literal](https://diffx.org/spec/binary-diffs.html#git-literal-binary-diffs):
+    /// contains the full file content, zlib-compressed and Base85-encoded.
+    Literal,
+    /// [Delta](https://diffx.org/spec/binary-diffs.html#git-delta-binary-diffs):
+    /// contains delta instructions to transform one file into another.
+    Delta,
+}
+
+/// A single block in a binary patch.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct BinaryBlock<'a> {
+    /// The type of this block (literal content or delta instructions).
+    pub kind: BinaryBlockKind,
+    /// The encoded data.
+    pub data: BinaryData<'a>,
+}
+
+/// A parsed binary patch.
+///
+/// A binary patch contains encoded binary data that can be decoded
+/// to recover the original and modified file contents.
+///
+/// Git may use different encodings for each direction:
+///
+/// - `literal`: full file content
+/// - `delta`: instructions to transform one file into another
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum BinaryPatch<'a> {
+    /// A full binary patch with forward and reverse data.
+    ///
+    /// The forward block transforms original -> modified.
+    /// The reverse block transforms modified -> original.
+    ///
+    /// Each block can independently be either `literal` or `delta`.
+    Full {
+        /// Forward transformation (original -> modified).
+        forward: BinaryBlock<'a>,
+        /// Reverse transformation (modified -> original).
+        reverse: BinaryBlock<'a>,
+    },
+    /// A Git binary diff marker.
+    ///
+    /// This represents the `Binary files a/path and b/path differ` case,
+    /// where git detected a binary change but didn't include the actual data.
+    Marker,
+}
+
+/// Represents a single binary payload in a Git binary diff.
+///
+/// For example, the following patch block
+///
+/// * is parsed as `BinaryData { size: 10, data: "UcmV+l0QLU>0RjUA1qKHQ2>\`DEE&u=k" }`
+/// * The line starts with a length indicator (`U` = 21 decoded bytes)
+///
+///
+/// ```text
+/// literal 10
+/// UcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k
+/// ```
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct BinaryData<'a> {
+    /// Uncompressed size in bytes.
+    pub size: u64,
+    /// Raw Base85 lines with length indicators.
+    pub data: &'a str,
+}
+
+/// Error type for binary patch operations.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct BinaryPatchParseError {
+    pub(crate) kind: BinaryPatchParseErrorKind,
+    span: Option<Range<usize>>,
+}
+
+impl BinaryPatchParseError {
+    /// Creates a new error with the given kind and span.
+    pub(crate) fn new(kind: BinaryPatchParseErrorKind, span: Range<usize>) -> Self {
+        Self {
+            kind,
+            span: Some(span),
+        }
+    }
+
+    /// Returns the byte range in the input where the error occurred.
+    pub fn span(&self) -> Option<Range<usize>> {
+        self.span.clone()
+    }
+}
+
+impl fmt::Display for BinaryPatchParseError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        if let Some(span) = &self.span {
+            write!(
+                f,
+                "error parsing binary patch at byte {}: {}",
+                span.start, self.kind
+            )
+        } else {
+            write!(f, "error parsing binary patch: {}", self.kind)
+        }
+    }
+}
+
+impl std::error::Error for BinaryPatchParseError {}
+
+impl From<BinaryPatchParseErrorKind> for BinaryPatchParseError {
+    fn from(kind: BinaryPatchParseErrorKind) -> Self {
+        Self { kind, span: None }
+    }
+}
+
+/// The kind of error that occurred when parsing a binary patch.
+#[derive(Debug, Clone, PartialEq, Eq)]
+#[non_exhaustive]
+pub(crate) enum BinaryPatchParseErrorKind {
+    /// Missing or invalid "GIT binary patch" header.
+    InvalidHeader,
+
+    /// First binary block (forward) not found.
+    MissingForwardBlock,
+
+    /// Second binary block (reverse) not found.
+    MissingReverseBlock,
+
+    /// No binary data available (marker-only patch).
+    NoBinaryData,
+
+    /// Invalid line length indicator in Base85 data.
+    InvalidLineLengthIndicator,
+}
+
+impl fmt::Display for BinaryPatchParseErrorKind {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::InvalidHeader => write!(f, "invalid binary patch header"),
+            Self::MissingForwardBlock => write!(f, "first binary block not found"),
+            Self::MissingReverseBlock => write!(f, "second binary block not found"),
+            Self::NoBinaryData => write!(f, "no binary data available"),
+            Self::InvalidLineLengthIndicator => write!(f, "invalid line length indicator"),
+        }
+    }
+}
+
+/// Simple streaming parser for binary patches using lines() iterator.
+struct BinaryParser<'a> {
+    input: &'a str,
+    offset: usize,
+}
+
+impl<'a> BinaryParser<'a> {
+    fn new(input: &'a str) -> Self {
+        Self { input, offset: 0 }
+    }
+
+    fn peek_line(&self) -> Option<&'a str> {
+        self.input[self.offset..].lines().next()
+    }
+
+    /// Creates an error with the current offset as span.
+    fn error(&self, kind: BinaryPatchParseErrorKind) -> BinaryPatchParseError {
+        BinaryPatchParseError::new(kind, self.offset..self.offset)
+    }
+
+    fn next_line(&mut self) -> Option<&'a str> {
+        let rest = &self.input[self.offset..];
+        let line = rest.lines().next()?;
+        self.offset += line.len();
+
+        // Skip line ending (CRLF or LF)
+        let remaining = &self.input[self.offset..];
+        if remaining.starts_with("\r\n") {
+            self.offset += 2;
+        } else if remaining.starts_with('\n') {
+            self.offset += 1;
+        }
+
+        Some(line)
+    }
+
+    fn slice_from(&self, start_offset: usize) -> &'a str {
+        let end = self.offset;
+        // Strip trailing line ending before blank line
+        let slice = &self.input[start_offset..end];
+        slice
+            .strip_suffix("\r\n")
+            .or_else(|| slice.strip_suffix('\n'))
+            .unwrap_or(slice)
+    }
+}
+
+/// Parses binary patch content after git extended headers.
+///
+/// Expects input starting with "GIT binary patch" line.
+/// Returns the parsed patch and the number of bytes consumed.
+///
+/// Format:
+///
+/// ```text
+/// GIT binary patch
+/// <literal|delta> <modified_size>
+/// <base85_lines>
+///
+/// <literal|delta> <original_size>
+/// <base85_lines>
+/// ```
+pub(crate) fn parse_binary_patch(
+    input: &str,
+) -> Result<(BinaryPatch<'_>, usize), BinaryPatchParseError> {
+    let mut parser = BinaryParser::new(input);
+
+    // Expect "GIT binary patch" marker
+    if parser.next_line() != Some("GIT binary patch") {
+        return Err(parser.error(BinaryPatchParseErrorKind::InvalidHeader));
+    }
+
+    // Parse first block (forward: original -> modified)
+    let Some(forward) = parse_binary_block(&mut parser) else {
+        return Err(parser.error(BinaryPatchParseErrorKind::MissingForwardBlock));
+    };
+
+    // Parse second block (reverse: modified -> original)
+    let Some(reverse) = parse_binary_block(&mut parser) else {
+        return Err(parser.error(BinaryPatchParseErrorKind::MissingReverseBlock));
+    };
+
+    Ok((BinaryPatch::Full { forward, reverse }, parser.offset))
+}
+
+/// Parses a single binary block.
+///
+/// Returns a `BinaryBlock` with kind (literal/delta) and data.
+fn parse_binary_block<'a>(parser: &mut BinaryParser<'a>) -> Option<BinaryBlock<'a>> {
+    // Parse "literal 10" or "delta 18"
+    let format_line = parser.next_line()?;
+    let (patch_type, size_str) = format_line.split_once(' ')?;
+    let size: u64 = size_str.parse().ok()?;
+
+    let kind = match patch_type {
+        "literal" => BinaryBlockKind::Literal,
+        "delta" => BinaryBlockKind::Delta,
+        _ => return None,
+    };
+
+    // Record start of Base85 data
+    let data_start = parser.offset;
+
+    // Consume Base85 lines until blank line
+    while let Some(line) = parser.peek_line() {
+        if line.is_empty() {
+            parser.next_line(); // Consume blank line
+            break;
+        }
+        parser.next_line();
+    }
+
+    let data = parser.slice_from(data_start);
+
+    Some(BinaryBlock {
+        kind,
+        data: BinaryData { size, data },
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_literal_format_simple() {
+        let input = "GIT binary patch\nliteral 10\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\n\nliteral 0\nKcmV+b0RR6000031\n\n";
+        let (patch, consumed) = parse_binary_patch(input).unwrap();
+
+        assert_eq!(consumed, input.len());
+        match &patch {
+            BinaryPatch::Full { forward, reverse } => {
+                assert_eq!(forward.kind, BinaryBlockKind::Literal);
+                assert_eq!(forward.data.size, 10);
+                assert_eq!(reverse.kind, BinaryBlockKind::Literal);
+                assert_eq!(reverse.data.size, 0);
+            }
+            _ => panic!("expected Full variant"),
+        }
+    }
+
+    #[test]
+    fn parse_delta_format() {
+        let input = "GIT binary patch\ndelta 18\nccmV+t0PX*P2!IH%^Z^9`00000v-trB0x!=5aR2}S\n\ndelta 18\nccmV+t0PX*P2!IH%^Z^BFm9#}av-trB0zxAOrvLx|\n\n";
+        let (patch, _) = parse_binary_patch(input).unwrap();
+
+        match &patch {
+            BinaryPatch::Full { forward, reverse } => {
+                assert_eq!(forward.kind, BinaryBlockKind::Delta);
+                assert_eq!(forward.data.size, 18);
+                assert_eq!(reverse.kind, BinaryBlockKind::Delta);
+                assert_eq!(reverse.data.size, 18);
+            }
+            _ => panic!("expected Full variant"),
+        }
+    }
+
+    #[test]
+    fn parse_invalid_header() {
+        // Without "GIT binary patch" marker, parse_binary_patch returns error
+        let input = "literal 10\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\n\n";
+        let err = parse_binary_patch(input).unwrap_err();
+        assert_eq!(err.kind, BinaryPatchParseErrorKind::InvalidHeader);
+    }
+
+    #[test]
+    fn parse_with_crlf_line_endings() {
+        let input = "GIT binary patch\r\nliteral 10\r\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\r\n\r\nliteral 0\r\nKcmV+b0RR6000031\r\n\r\n";
+        let (patch, consumed) = parse_binary_patch(input).unwrap();
+
+        assert_eq!(consumed, input.len());
+        match &patch {
+            BinaryPatch::Full { forward, reverse } => {
+                assert_eq!(forward.kind, BinaryBlockKind::Literal);
+                assert_eq!(forward.data.size, 10);
+                assert_eq!(reverse.kind, BinaryBlockKind::Literal);
+                assert_eq!(reverse.data.size, 0);
+            }
+            _ => panic!("expected Full variant"),
+        }
+    }
+
+    #[test]
+    fn parse_mixed_format() {
+        // Git can use different encoding for each direction
+        let input = "GIT binary patch\nliteral 10\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\n\ndelta 18\nccmV+t0PX*P2!IH%^Z^9`00000v-trB0x!=5aR2}S\n\n";
+        let (patch, _) = parse_binary_patch(input).unwrap();
+
+        match &patch {
+            BinaryPatch::Full { forward, reverse } => {
+                assert_eq!(forward.kind, BinaryBlockKind::Literal);
+                assert_eq!(reverse.kind, BinaryBlockKind::Delta);
+            }
+            _ => panic!("expected Full variant"),
+        }
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 33ebbb6..5e11a40 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -223,6 +223,7 @@
 //! [`create_patch_bytes`]: fn.create_patch_bytes.html
 
 mod apply;
+pub mod binary;
 mod diff;
 mod merge;
 mod patch;

From 1f7f50f49ca4a4210365a948346823466f54be47 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sun, 12 Apr 2026 18:46:16 -0400
Subject: [PATCH 12/26] feat(patch_set): wire binary patch into gitdiff parser

- Add `Binary::Keep` variant (now the default) to `ParseOptions`
- Add `PatchKind::Binary` variant for binary patches
- Parse `GIT binary patch` payload via `parse_binary_patch`
- Handle `Binary files ... differ` as `BinaryPatch::Marker`
- Add `extract_file_op_binary` for file ops without ---/+++ headers
---
 src/patch_set/error.rs |  11 ++++
 src/patch_set/mod.rs   |  35 +++++++++++--
 src/patch_set/parse.rs | 115 +++++++++++++++++++++++++++++++++++++++--
 src/patch_set/tests.rs |  24 ++++++++-
 tests/compat/common.rs |  12 ++++-
 tests/replay.rs        |   5 ++
 6 files changed, 192 insertions(+), 10 deletions(-)

diff --git a/src/patch_set/error.rs b/src/patch_set/error.rs
index 9cd6aa9..20d7d3e 100644
--- a/src/patch_set/error.rs
+++ b/src/patch_set/error.rs
@@ -3,6 +3,7 @@
 use std::fmt;
 use std::ops::Range;
 
+use crate::binary::BinaryPatchParseError;
 use crate::patch::ParsePatchError;
 
 /// An error returned when parsing patches fails.
@@ -79,6 +80,9 @@ pub(crate) enum PatchSetParseErrorKind {
 
     /// Binary diff not supported in current configuration.
     BinaryNotSupported { path: String },
+
+    /// Binary patch parsing failed.
+    BinaryParse(BinaryPatchParseError),
 }
 
 impl fmt::Display for PatchSetParseErrorKind {
@@ -95,6 +99,7 @@ impl fmt::Display for PatchSetParseErrorKind {
             Self::BinaryNotSupported { path } => {
                 write!(f, "binary diff not supported: {path}")
             }
+            Self::BinaryParse(e) => write!(f, "{e}"),
         }
     }
 }
@@ -104,3 +109,9 @@ impl From<ParsePatchError> for PatchSetParseError {
         PatchSetParseErrorKind::Patch(e).into()
     }
 }
+
+impl From<BinaryPatchParseError> for PatchSetParseError {
+    fn from(e: BinaryPatchParseError) -> Self {
+        PatchSetParseErrorKind::BinaryParse(e).into()
+    }
+}
diff --git a/src/patch_set/mod.rs b/src/patch_set/mod.rs
index f83442c..6141c54 100644
--- a/src/patch_set/mod.rs
+++ b/src/patch_set/mod.rs
@@ -10,6 +10,7 @@ mod tests;
 
 use std::borrow::Cow;
 
+use crate::binary::BinaryPatch;
 use crate::Patch;
 
 pub use error::PatchSetParseError;
@@ -31,7 +32,7 @@ pub use parse::PatchSet;
 /// Note that this is not a documented Git behavior,
 /// so the implementation here is subject to change if Git changes.
 ///
-/// By default, binary diffs are skipped.
+/// By default, binary diffs are kept in the output.
 ///
 /// ## Example
 ///
@@ -70,10 +71,12 @@ pub(crate) enum Format {
 #[derive(Debug, Clone, Copy, Default)]
 pub(crate) enum Binary {
     /// Skip binary diffs silently.
-    #[default]
     Skip,
     /// Return error if binary diff encountered.
     Fail,
+    /// Keep binary diffs in the output.
+    #[default]
+    Keep,
 }
 
 impl ParseOptions {
@@ -103,7 +106,7 @@ impl ParseOptions {
     /// * `diff --git` headers
     /// * Extended headers (`new file mode`, `deleted file mode`, etc.)
     /// * Rename/copy detection (`rename from`/`rename to`, `copy from`/`copy to`)
-    /// * Binary file detection (skipped by default)
+    /// * Binary file detection (kept by default)
     ///
     /// [git-diff-format]: https://git-scm.com/docs/diff-format
     pub fn gitdiff() -> Self {
@@ -158,6 +161,8 @@ impl std::str::FromStr for FileMode {
 pub enum PatchKind<'a, T: ToOwned + ?Sized> {
     /// Text patch with hunks.
     Text(Patch<'a, T>),
+    /// Binary patch (literal or delta encoded, or marker-only).
+    Binary(BinaryPatch<'a>),
 }
 
 impl<T: ?Sized, O> std::fmt::Debug for PatchKind<'_, T>
@@ -168,6 +173,7 @@ where
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
             PatchKind::Text(patch) => f.debug_tuple("Text").field(patch).finish(),
+            PatchKind::Binary(patch) => f.debug_tuple("Binary").field(patch).finish(),
         }
     }
 }
@@ -177,6 +183,15 @@ impl<'a, T: ToOwned + ?Sized> PatchKind<'a, T> {
     pub fn as_text(&self) -> Option<&Patch<'a, T>> {
         match self {
             PatchKind::Text(patch) => Some(patch),
+            PatchKind::Binary(_) => None,
+        }
+    }
+
+    /// Returns the binary patch, or `None` if this is a text patch.
+    pub fn as_binary(&self) -> Option<&BinaryPatch<'a>> {
+        match self {
+            PatchKind::Binary(patch) => Some(patch),
+            PatchKind::Text(_) => None,
         }
     }
 }
@@ -224,6 +239,20 @@ impl<'a, T: ToOwned + ?Sized> FilePatch<'a, T> {
         }
     }
 
+    fn new_binary(
+        operation: FileOperation<'a>,
+        patch: BinaryPatch<'a>,
+        old_mode: Option<FileMode>,
+        new_mode: Option<FileMode>,
+    ) -> Self {
+        Self {
+            operation,
+            kind: PatchKind::Binary(patch),
+            old_mode,
+            new_mode,
+        }
+    }
+
     /// Returns the file operation for this patch.
     pub fn operation(&self) -> &FileOperation<'a> {
         &self.operation
diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs
index 0d8b994..b089479 100644
--- a/src/patch_set/parse.rs
+++ b/src/patch_set/parse.rs
@@ -4,6 +4,7 @@ use super::{
     error::PatchSetParseErrorKind, Binary, FileMode, FileOperation, FilePatch, Format,
     ParseOptions, PatchSetParseError,
 };
+use crate::binary::{parse_binary_patch, BinaryPatch};
 use crate::patch::parse::parse_one;
 use crate::utils::escaped_filename;
 use crate::Patch;
@@ -86,8 +87,8 @@ impl<'a> PatchSet<'a> {
         let (header, header_consumed) = GitHeader::parse(&self.input[self.offset..]);
         self.offset += header_consumed;
 
-        // Handle binary markers ("Binary files ... differ")
-        if header.is_binary_marker || header.is_binary_patch {
+        // Handle "Binary files ... differ" (no patch data)
+        if header.is_binary_marker {
             match self.opts.binary {
                 Binary::Skip => {
                     return self.next_gitdiff_patch();
@@ -99,6 +100,77 @@ impl<'a> PatchSet<'a> {
                         abs_patch_start..abs_patch_start,
                     )));
                 }
+                Binary::Keep => {
+                    // FIXME: error spans point at `diff --git` line, not the specific offending line
+                    let operation = match extract_file_op_binary(&header) {
+                        Ok(op) => op,
+                        Err(mut e) => {
+                            e.set_span(abs_patch_start..abs_patch_start);
+                            return Some(Err(e));
+                        }
+                    };
+                    let (old_mode, new_mode) = match parse_file_modes(&header) {
+                        Ok(modes) => modes,
+                        Err(mut e) => {
+                            e.set_span(abs_patch_start..abs_patch_start);
+                            return Some(Err(e));
+                        }
+                    };
+                    return Some(Ok(FilePatch::new_binary(
+                        operation,
+                        BinaryPatch::Marker,
+                        old_mode,
+                        new_mode,
+                    )));
+                }
+            }
+        }
+
+        // Handle "GIT binary patch" (has patch data)
+        if let Some(binary_patch_start) = header.binary_patch_offset {
+            match self.opts.binary {
+                Binary::Skip => {
+                    return self.next_gitdiff_patch();
+                }
+                Binary::Fail => {
+                    let path = header.diff_git_line.unwrap_or("<unknown>").to_owned();
+                    return Some(Err(PatchSetParseError::new(
+                        PatchSetParseErrorKind::BinaryNotSupported { path },
+                        abs_patch_start..abs_patch_start,
+                    )));
+                }
+                Binary::Keep => {
+                    // GitHeader::parse consumed the marker line but not the payload.
+                    // Use the recorded offset to pass input from the marker onward.
+                    let binary_input = &self.input[abs_patch_start + binary_patch_start..];
+                    let (binary_patch, consumed) = match parse_binary_patch(binary_input) {
+                        Ok(result) => result,
+                        Err(e) => return Some(Err(e.into())),
+                    };
+                    self.offset = abs_patch_start + binary_patch_start + consumed;
+
+                    // FIXME: error spans point at `diff --git` line, not the specific offending line
+                    let operation = match extract_file_op_binary(&header) {
+                        Ok(op) => op,
+                        Err(mut e) => {
+                            e.set_span(abs_patch_start..abs_patch_start);
+                            return Some(Err(e));
+                        }
+                    };
+                    let (old_mode, new_mode) = match parse_file_modes(&header) {
+                        Ok(modes) => modes,
+                        Err(mut e) => {
+                            e.set_span(abs_patch_start..abs_patch_start);
+                            return Some(Err(e));
+                        }
+                    };
+                    return Some(Ok(FilePatch::new_binary(
+                        operation,
+                        binary_patch,
+                        old_mode,
+                        new_mode,
+                    )));
+                }
             }
         }
 
@@ -301,7 +373,8 @@ struct GitHeader<'a> {
     /// Binary files /dev/null and b/image.png differ
     /// ```
     is_binary_marker: bool,
-    /// Whether this is a binary diff with actual patch content.
+    /// Byte offset of `"GIT binary patch"` line relative to header input,
+    /// or `None` if no binary patch content was found.
     ///
     /// Observed `git diff --binary` output:
     ///
@@ -316,7 +389,7 @@ struct GitHeader<'a> {
     /// literal 0
     /// KcmV+b0RR6000031
     /// ```
-    is_binary_patch: bool,
+    binary_patch_offset: Option<usize>,
 }
 
 impl<'a> GitHeader<'a> {
@@ -363,7 +436,7 @@ impl<'a> GitHeader<'a> {
             } else if line.starts_with("Binary files ") {
                 header.is_binary_marker = true;
             } else if line.starts_with("GIT binary patch") {
-                header.is_binary_patch = true;
+                header.binary_patch_offset = Some(consumed);
             } else {
                 // Unrecognized line: End of extended headers
                 // (typically `---`/`+++`/`@@` or trailing content).
@@ -416,6 +489,38 @@ fn extract_file_op_gitdiff<'a>(
     }
 }
 
+/// Extracts file operation for binary patches (no ---/+++ headers).
+fn extract_file_op_binary<'a>(
+    header: &GitHeader<'a>,
+) -> Result<FileOperation<'a>, PatchSetParseError> {
+    // Git headers are authoritative for rename/copy
+    if let (Some(from), Some(to)) = (header.rename_from, header.rename_to) {
+        return Ok(FileOperation::Rename {
+            from: Cow::Borrowed(from),
+            to: Cow::Borrowed(to),
+        });
+    }
+    if let (Some(from), Some(to)) = (header.copy_from, header.copy_to) {
+        return Ok(FileOperation::Copy {
+            from: Cow::Borrowed(from),
+            to: Cow::Borrowed(to),
+        });
+    }
+
+    // Use `diff --git <old> <new>` for binary patches.
+    let Some((original, modified)) = header.diff_git_line.and_then(parse_diff_git_path) else {
+        return Err(PatchSetParseErrorKind::InvalidDiffGitPath.into());
+    };
+
+    if header.new_file_mode.is_some() {
+        Ok(FileOperation::Create(modified))
+    } else if header.deleted_file_mode.is_some() {
+        Ok(FileOperation::Delete(original))
+    } else {
+        Ok(FileOperation::Modify { original, modified })
+    }
+}
+
 /// Parses file modes from git extended headers.
 fn parse_file_modes(
     header: &GitHeader<'_>,
diff --git a/src/patch_set/tests.rs b/src/patch_set/tests.rs
index 9648930..0944c54 100644
--- a/src/patch_set/tests.rs
+++ b/src/patch_set/tests.rs
@@ -535,6 +535,7 @@ index 0000000..e69de29
         );
         match patches[0].patch() {
             PatchKind::Text(p) => assert!(p.hunks().is_empty()),
+            PatchKind::Binary(_) => panic!("expected text patch"),
         }
     }
 
@@ -566,6 +567,25 @@ diff --git a/foo b/foo
         ));
     }
 
+    #[test]
+    fn binary_marker_kept_by_default() {
+        // Default is Keep: binary marker is returned as BinaryPatch::Marker.
+        let input = "\
+diff --git a/img.png b/img.png
+Binary files a/img.png and b/img.png differ
+diff --git a/foo b/foo
+--- a/foo
++++ b/foo
+@@ -1 +1 @@
+-old
++new
+";
+        let patches = parse_gitdiff(input);
+        assert_eq!(patches.len(), 2);
+        assert!(patches[0].patch().as_binary().is_some());
+        assert!(patches[1].patch().as_text().is_some());
+    }
+
     #[test]
     fn binary_skip_does_not_stall() {
         // Binary marker must be skipped and offset must advance
@@ -580,7 +600,9 @@ diff --git a/foo b/foo
 -old
 +new
 ";
-        let patches = parse_gitdiff(input);
+        let patches: Vec<_> = PatchSet::parse(input, ParseOptions::gitdiff().skip_binary())
+            .collect::<Result<_, _>>()
+            .unwrap();
         assert_eq!(patches.len(), 1, "binary should be skipped, text parsed");
     }
 }
diff --git a/tests/compat/common.rs b/tests/compat/common.rs
index 117ed04..c55cdd1 100644
--- a/tests/compat/common.rs
+++ b/tests/compat/common.rs
@@ -8,7 +8,10 @@ use std::{
     sync::Once,
 };
 
-use diffy::patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet, PatchSetParseError};
+use diffy::{
+    binary::BinaryPatch,
+    patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet, PatchSetParseError},
+};
 
 /// Which external tool to compare against.
 #[derive(Clone, Copy)]
@@ -368,6 +371,13 @@ pub fn apply_diffy(
                 }
                 fs::write(&result_path, result.as_bytes()).unwrap();
             }
+            PatchKind::Binary(BinaryPatch::Marker) => {
+                // Dont do anything if it is just a binary patch marker.[
+            }
+            PatchKind::Binary(_) => {
+                // Binary patch application requires the `binary` feature.
+                // Will be wired up when that feature is added.
+            }
         }
     }
 
diff --git a/tests/replay.rs b/tests/replay.rs
index 1d4e731..4996151 100644
--- a/tests/replay.rs
+++ b/tests/replay.rs
@@ -512,6 +512,11 @@ fn process_commit(
                     );
                 }
             }
+            PatchKind::Binary(_) => {
+                // Binary patch application not yet wired up in replay tests.
+                // Will be done once the `binary` Cargo feature is added.
+                skipped += 1;
+            }
         }
 
         applied += 1;

From 4b299387ec0ac56ba9d3e38a1fb28759641ded69 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Mon, 2 Feb 2026 07:22:59 +0800
Subject: [PATCH 13/26] refactor: clippy manual_div_ceil

The API was stabilized in 1.73.
The lint was added in 1.93.

This is required for a MSRV bump to 1.75
---
 src/diff/myers.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diff/myers.rs b/src/diff/myers.rs
index 73eb410..502d077 100644
--- a/src/diff/myers.rs
+++ b/src/diff/myers.rs
@@ -69,7 +69,7 @@ impl ::std::fmt::Display for Snake {
 
 fn max_d(len1: usize, len2: usize) -> usize {
     // XXX look into reducing the need to have the additional '+ 1'
-    (len1 + len2 + 1) / 2 + 1
+    (len1 + len2).div_ceil(2) + 1
 }
 
 // The divide part of a divide-and-conquer strategy. A D-path has D+1 snakes some of which may

From 70eb62f6d8b890b473761b14db24a0cdc003332b Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sun, 12 Apr 2026 21:15:04 -0400
Subject: [PATCH 14/26] chore: dependency flate2 behind binary feature

This is a preparation for binary diff application support.

* Git binary patch is compressed by zlib hence flate2
* zlib-rs (which is the most performant zlib backend)
  requires MSRF 1.75.0+ hence the bump.
---
 .github/workflows/ci.yml |  2 +-
 Cargo.lock               | 39 +++++++++++++++++++++++++++++++++++++++
 Cargo.toml               |  4 +++-
 deny.toml                |  1 +
 4 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d1c8292..b1eb5a6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -23,7 +23,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        rust: [stable, beta, nightly, 1.70.0]
+        rust: [stable, beta, nightly, 1.75.0]
 
     steps:
       - uses: actions/checkout@v6
diff --git a/Cargo.lock b/Cargo.lock
index ad290e3..c726b1c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,6 +2,12 @@
 # It is not intended for manual editing.
 version = 3
 
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
 [[package]]
 name = "anstream"
 version = "0.6.21"
@@ -109,6 +115,7 @@ name = "diffy"
 version = "0.4.2"
 dependencies = [
  "anstyle",
+ "flate2",
  "rayon",
  "snapbox",
 ]
@@ -152,6 +159,16 @@ dependencies = [
  "libredox",
 ]
 
+[[package]]
+name = "flate2"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
+dependencies = [
+ "miniz_oxide",
+ "zlib-rs",
+]
+
 [[package]]
 name = "getrandom"
 version = "0.3.4"
@@ -200,6 +217,16 @@ version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
 
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+ "simd-adler32",
+]
+
 [[package]]
 name = "normalize-line-endings"
 version = "0.3.0"
@@ -281,6 +308,12 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "simd-adler32"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
+
 [[package]]
 name = "similar"
 version = "2.7.0"
@@ -446,3 +479,9 @@ name = "wit-bindgen"
 version = "0.51.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+
+[[package]]
+name = "zlib-rs"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513"
diff --git a/Cargo.toml b/Cargo.toml
index 25c6e51..68ce3ff 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,14 +9,16 @@ repository = "https://github.com/bmwill/diffy"
 readme = "README.md"
 keywords = ["diff", "patch", "merge"]
 categories = ["text-processing"]
-rust-version = "1.70.0"
+rust-version = "1.75.0"
 edition = "2021"
 
 [features]
+binary = ["dep:flate2"]
 color = ["dep:anstyle"]
 
 [dependencies]
 anstyle = { version = "1.0.13", optional = true }
+flate2 = { version = "1.1.9", optional = true, default-features = false, features = ["zlib-rs"] }
 
 [dev-dependencies]
 rayon = "1.10.0"
diff --git a/deny.toml b/deny.toml
index d495b7c..740f35a 100644
--- a/deny.toml
+++ b/deny.toml
@@ -91,6 +91,7 @@ ignore = [
 allow = [
     "MIT",
     "Apache-2.0",
+    "Zlib",
     #"Apache-2.0 WITH LLVM-exception",
 ]
 # The confidence threshold for detecting a license from license text.

From 3540c162477491f1661fd53a854451222fd3c3f3 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sun, 12 Apr 2026 21:25:54 -0400
Subject: [PATCH 15/26] feat(binary): base85/delta decode and patch application

* Add base85 encoder/decoder and Git delta format decoder.
* Wire them into `BinaryPatch::apply() and `apply_reverse()`
  for decoding zlib-compressed, base85-encoded binary payload.

These are feature-gated behind the `binary` feature.
---
 src/binary/base85.rs | 229 +++++++++++++++++++++++++++
 src/binary/delta.rs  | 362 +++++++++++++++++++++++++++++++++++++++++++
 src/binary/mod.rs    | 210 +++++++++++++++++++++++++
 3 files changed, 801 insertions(+)
 create mode 100644 src/binary/base85.rs
 create mode 100644 src/binary/delta.rs

diff --git a/src/binary/base85.rs b/src/binary/base85.rs
new file mode 100644
index 0000000..182efe8
--- /dev/null
+++ b/src/binary/base85.rs
@@ -0,0 +1,229 @@
+//! Base85 encoding and decoding using the character set defined in [RFC 1924].
+//!
+//! ## References
+//!
+//! * [RFC 1924]
+//! * [Wikipedia: Ascii85 § RFC 1924 version](https://en.wikipedia.org/wiki/Ascii85#RFC_1924_version)
+//!
+//! [RFC 1924]: https://datatracker.ietf.org/doc/html/rfc1924
+
+use std::fmt;
+
+/// Base85 character set (RFC 1924).
+const ALPHABET: &[u8; 85] = b"0123456789\
+    ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+    !#$%&()*+-;<=>?@^_`{|}~";
+
+/// Pre-computed lookup table for Base85 decoding.
+///
+/// Maps ASCII byte value → digit value or `0xFF` for invalid characters.
+/// This provides O(1) lookup.
+const TABLE: [u8; 256] = {
+    let mut table = [0xFFu8; 256];
+    let mut i = 0usize;
+    while i < 85 {
+        table[ALPHABET[i] as usize] = i as u8;
+        i += 1;
+    }
+    table
+};
+
+/// Error type for Base85 operations.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Base85Error {
+    /// Invalid character that is not in RFC 1924 alphabet.
+    InvalidCharacter(char),
+    /// Invalid input length for the operation.
+    InvalidLength,
+}
+
+impl fmt::Display for Base85Error {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Base85Error::InvalidCharacter(c) => write!(f, "invalid base85 character: {:?}", c),
+            Base85Error::InvalidLength => write!(f, "invalid input length"),
+        }
+    }
+}
+
+impl std::error::Error for Base85Error {}
+
+/// Decodes a Base85 string to the provided output.
+///
+/// ## Limitations
+///
+/// The input length must be a multiple of 5.
+///
+/// This function does not handle padding for partial chunks.
+/// When decoding data where the original byte count isn't a multiple of 4,
+/// callers must handle truncation at a higher level.
+/// For example, via a length indicator in Git binary patch.
+pub fn decode_into(input: &str, output: &mut impl Extend<u8>) -> Result<(), Base85Error> {
+    let bytes = input.as_bytes();
+
+    if bytes.len() % 5 != 0 {
+        return Err(Base85Error::InvalidLength);
+    }
+
+    // TODO: Use `as_chunks::<5>()` when MSRV >= 1.88
+    for chunk in bytes.chunks_exact(5) {
+        let mut value: u32 = 0;
+        for &byte in chunk {
+            let digit = TABLE[byte as usize];
+            if digit == 0xFF {
+                return Err(Base85Error::InvalidCharacter(byte as char));
+            }
+            value = value * 85 + digit as u32;
+        }
+
+        output.extend(value.to_be_bytes());
+    }
+
+    Ok(())
+}
+
+/// Encodes bytes in Base85 to the provided output.
+///
+/// ## Limitations
+///
+/// The input length must be a multiple of 4.
+///
+/// This function does not handle padding for partial chunks.
+/// Callers encoding data where the byte count isn't a multiple of 4
+/// must handle padding at a higher level.
+/// For example, via a length indicator in Git binary patch format.
+#[allow(dead_code)] // will be used for patch formatting
+pub fn encode_into(input: &[u8], output: &mut impl Extend<char>) -> Result<(), Base85Error> {
+    if input.len() % 4 != 0 {
+        return Err(Base85Error::InvalidLength);
+    }
+
+    // TODO: Use `as_chunks::<4>()` when MSRV >= 1.88
+    for chunk in input.chunks_exact(4) {
+        let mut value = u32::from_be_bytes(chunk.try_into().unwrap());
+
+        // Extract 5 base85 digits (least to most significant order)
+        let mut digits = [0u8; 5];
+        for digit in digits.iter_mut().rev() {
+            *digit = ALPHABET[(value % 85) as usize];
+            value /= 85;
+        }
+        output.extend(digits.iter().map(|&b| b as char));
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn decode(input: &str) -> Result<Vec<u8>, Base85Error> {
+        let mut result = Vec::with_capacity((input.len() / 5) * 4);
+        decode_into(input, &mut result)?;
+        Ok(result)
+    }
+
+    fn encode(input: &[u8]) -> Result<String, Base85Error> {
+        let mut result = String::with_capacity((input.len() / 4) * 5);
+        encode_into(input, &mut result)?;
+        Ok(result)
+    }
+
+    const TEST_VECTORS: &[(&[u8], &str)] = &[
+        (b"", ""),
+        (&[0x00, 0x00, 0x00, 0x00], "00000"),
+        (&[0xff, 0xff, 0xff, 0xff], "|NsC0"),
+        // Rust ecosystem phrases
+        (b"Rust", "Qgw55"),
+        (b"Fearless concurrency", "MrC1gY-MwEAY*TCV|8+JWo~16"),
+        (b"memory safe!", "ZDnn5a(N(gVP<6^"),
+        (b"blazing fast", "Vr*f0X>MmAW?^%5"),
+        (
+            b"zero-cost abstraction!??",
+            "dS!BNEn{zUbRc13b98cHV{~b6ZXrKE",
+        ),
+    ];
+
+    #[test]
+    fn table_covers_all_alphabet_chars() {
+        for (i, &c) in ALPHABET.iter().enumerate() {
+            assert_eq!(
+                TABLE[c as usize], i as u8,
+                "mismatch for char '{}' at index {}",
+                c as char, i
+            );
+        }
+    }
+
+    #[test]
+    fn table_rejects_invalid_chars() {
+        let invalid_chars = b" \t\n\r\"'\\[],:";
+        for &c in invalid_chars {
+            assert_eq!(
+                TABLE[c as usize], 0xFF,
+                "char '{}' should be invalid",
+                c as char
+            );
+        }
+    }
+
+    #[test]
+    fn decode_test_vectors() {
+        for (bytes, encoded) in TEST_VECTORS {
+            let result = decode(encoded).unwrap();
+            assert_eq!(&result, *bytes, "decode({:?}) failed", encoded);
+        }
+    }
+
+    #[test]
+    fn encode_test_vectors() {
+        for (bytes, encoded) in TEST_VECTORS {
+            let result = encode(bytes).unwrap();
+            assert_eq!(result, *encoded, "encode({:?}) failed", bytes);
+        }
+    }
+
+    #[test]
+    fn decode_invalid_length() {
+        assert!(matches!(decode("0000"), Err(Base85Error::InvalidLength)));
+        assert!(matches!(decode("000"), Err(Base85Error::InvalidLength)));
+        assert!(matches!(decode("00"), Err(Base85Error::InvalidLength)));
+        assert!(matches!(decode("0"), Err(Base85Error::InvalidLength)));
+    }
+
+    #[test]
+    fn decode_invalid_character() {
+        assert!(matches!(
+            decode("0000 "),
+            Err(Base85Error::InvalidCharacter(' '))
+        ));
+        assert!(matches!(
+            decode("0000\""),
+            Err(Base85Error::InvalidCharacter('"'))
+        ));
+    }
+
+    #[test]
+    fn encode_invalid_length() {
+        assert!(matches!(encode(&[0]), Err(Base85Error::InvalidLength)));
+        assert!(matches!(encode(&[0, 0]), Err(Base85Error::InvalidLength)));
+        assert!(matches!(
+            encode(&[0, 0, 0]),
+            Err(Base85Error::InvalidLength)
+        ));
+        assert!(matches!(
+            encode(&[0, 0, 0, 0, 0]),
+            Err(Base85Error::InvalidLength)
+        ));
+    }
+
+    #[test]
+    fn round_trip() {
+        for (bytes, _) in TEST_VECTORS {
+            let encoded = encode(bytes).unwrap();
+            let decoded = decode(&encoded).unwrap();
+            assert_eq!(&decoded, *bytes, "round-trip failed for {:?}", bytes);
+        }
+    }
+}
diff --git a/src/binary/delta.rs b/src/binary/delta.rs
new file mode 100644
index 0000000..fa1de26
--- /dev/null
+++ b/src/binary/delta.rs
@@ -0,0 +1,362 @@
+//! Git delta binary diff support.
+//!
+//! A delta payload contains:
+//!
+//! 1. Header: variable-length encoded sizes (original_size, modified_size)
+//! 2. Instructions: sequence of `ADD` and `COPY` operations
+//!
+//! Based on Diffx's [Git Delta Binary Diffs](https://diffx.org/spec/binary-diffs.html#git-delta-binary-diffs)
+
+use std::fmt;
+
+/// Applies delta instructions to an original file, producing the modified file.
+pub fn apply(original: &[u8], delta: &[u8]) -> Result<Vec<u8>, DeltaError> {
+    let mut cursor = DeltaCursor::new(delta);
+
+    let header_orig_size = cursor.read_size()?;
+    let header_mod_size = cursor.read_size()?;
+
+    // Validate original size
+    if original.len() as u64 != header_orig_size {
+        return Err(DeltaError::OriginalSizeMismatch {
+            expected: header_orig_size,
+            actual: original.len() as u64,
+        });
+    }
+
+    let mut result = Vec::with_capacity(header_mod_size as usize);
+
+    // Process instructions until we've consumed all delta data
+    while !cursor.is_empty() {
+        let control = cursor.read_byte()?;
+
+        if control & 0x80 != 0 {
+            // COPY instruction
+            let (src_offset, copy_len) = cursor.read_copy_params(control)?;
+            let src_end = src_offset
+                .checked_add(copy_len)
+                .ok_or(DeltaError::InvalidCopyRange)?;
+
+            if src_end > original.len() {
+                return Err(DeltaError::CopyOutOfBounds {
+                    offset: src_offset,
+                    length: copy_len,
+                    original_size: original.len(),
+                });
+            }
+
+            result.extend_from_slice(&original[src_offset..src_end]);
+        } else {
+            // ADD instruction
+            let add_len = control as usize;
+            let data = cursor.read_bytes(add_len)?;
+            result.extend_from_slice(data);
+        }
+    }
+
+    // Validate result size
+    if result.len() as u64 != header_mod_size {
+        return Err(DeltaError::ModifiedSizeMismatch {
+            expected: header_mod_size,
+            actual: result.len() as u64,
+        });
+    }
+
+    Ok(result)
+}
+
+/// Cursor for reading delta instructions.
+struct DeltaCursor<'a> {
+    data: &'a [u8],
+    offset: usize,
+}
+
+impl<'a> DeltaCursor<'a> {
+    fn new(data: &'a [u8]) -> Self {
+        Self { data, offset: 0 }
+    }
+
+    fn is_empty(&self) -> bool {
+        self.offset >= self.data.len()
+    }
+
+    fn read_byte(&mut self) -> Result<u8, DeltaError> {
+        if self.offset >= self.data.len() {
+            return Err(DeltaError::UnexpectedEof);
+        }
+        let byte = self.data[self.offset];
+        self.offset += 1;
+        Ok(byte)
+    }
+
+    fn read_bytes(&mut self, len: usize) -> Result<&'a [u8], DeltaError> {
+        let end = self
+            .offset
+            .checked_add(len)
+            .ok_or(DeltaError::UnexpectedEof)?;
+        if end > self.data.len() {
+            return Err(DeltaError::UnexpectedEof);
+        }
+        let bytes = &self.data[self.offset..end];
+        self.offset = end;
+        Ok(bytes)
+    }
+
+    /// Reads a variable-length encoded size from the header.
+    ///
+    /// Format: each byte uses 7 bits for value, MSB indicates continuation.
+    /// Bytes are in little-endian order (LSB first).
+    fn read_size(&mut self) -> Result<u64, DeltaError> {
+        let mut file_len: u64 = 0;
+        let mut shift: u32 = 0;
+
+        loop {
+            let byte = self.read_byte()?;
+
+            // Add 7 bits of value at current shift position
+            let value = (byte & 0x7F) as u64;
+            file_len |= value.checked_shl(shift).ok_or(DeltaError::SizeOverflow)?;
+
+            // MSB clear means this is the last byte
+            if byte & 0x80 == 0 {
+                break;
+            }
+
+            shift += 7;
+        }
+
+        Ok(file_len)
+    }
+
+    /// Reads `COPY` instruction parameters from the control byte.
+    /// Returns `(src_offset, copy_len)`.
+    ///
+    /// Control byte format is `1oooosss`:
+    ///
+    /// * Bits 0-3: src_offset bytes
+    /// * Bits 4-6: copy_len bytes
+    fn read_copy_params(&mut self, control: u8) -> Result<(usize, usize), DeltaError> {
+        let mut src_offset: u32 = 0;
+        for (mask, shift) in [(0x01, 0), (0x02, 8), (0x04, 16), (0x08, 24)] {
+            if control & mask != 0 {
+                let byte = self.read_byte()? as u32;
+                src_offset |= byte.checked_shl(shift).ok_or(DeltaError::SizeOverflow)?;
+            }
+        }
+
+        let mut copy_len: u32 = 0;
+        for (mask, shift) in [(0x10, 0), (0x20, 8), (0x40, 16)] {
+            if control & mask != 0 {
+                let byte = self.read_byte()? as u32;
+                copy_len |= byte.checked_shl(shift).ok_or(DeltaError::SizeOverflow)?;
+            }
+        }
+
+        if copy_len == 0 {
+            // Size of 0 means 65536
+            copy_len = 0x10000;
+        }
+
+        Ok((src_offset as usize, copy_len as usize))
+    }
+}
+
+/// Error type for delta operations.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum DeltaError {
+    /// Unexpected end of delta data.
+    UnexpectedEof,
+    /// Size value overflowed during decoding.
+    SizeOverflow,
+    /// Original file size doesn't match header.
+    OriginalSizeMismatch { expected: u64, actual: u64 },
+    /// Modified file size doesn't match header.
+    ModifiedSizeMismatch { expected: u64, actual: u64 },
+    /// COPY instruction references out-of-bounds data.
+    CopyOutOfBounds {
+        offset: usize,
+        length: usize,
+        original_size: usize,
+    },
+    /// COPY range calculation overflowed.
+    InvalidCopyRange,
+}
+
+impl fmt::Display for DeltaError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            DeltaError::UnexpectedEof => write!(f, "unexpected end of delta data"),
+            DeltaError::SizeOverflow => write!(f, "size value overflow"),
+            DeltaError::OriginalSizeMismatch { expected, actual } => {
+                write!(
+                    f,
+                    "original size mismatch: expected {expected}, got {actual}"
+                )
+            }
+            DeltaError::ModifiedSizeMismatch { expected, actual } => {
+                write!(
+                    f,
+                    "modified size mismatch: expected {expected}, got {actual}"
+                )
+            }
+            DeltaError::CopyOutOfBounds {
+                offset,
+                length,
+                original_size,
+            } => {
+                write!(
+                    f,
+                    "copy out of bounds: offset={offset}, length={length}, original_size={original_size}"
+                )
+            }
+            DeltaError::InvalidCopyRange => write!(f, "copy range calculation overflow"),
+        }
+    }
+}
+
+impl std::error::Error for DeltaError {}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn read_size_single_byte() {
+        // 0x0A = 10, MSB clear = end
+        let data = [0x0A];
+        let mut cursor = DeltaCursor::new(&data);
+        assert_eq!(cursor.read_size().unwrap(), 10);
+    }
+
+    #[test]
+    fn read_size_multi_byte() {
+        // 0x80 | 0x01 = 1, continue; 0x02 = 2 << 7 = 256; total = 257
+        let data = [0x81, 0x02];
+        let mut cursor = DeltaCursor::new(&data);
+        assert_eq!(cursor.read_size().unwrap(), 1 + (2 << 7));
+    }
+
+    #[test]
+    fn apply_add_only() {
+        // Header: orig_size=0, mod_size=5
+        // ADD 5 bytes: "hello"
+        let delta = [
+            0x00, // orig_size = 0
+            0x05, // mod_size = 5
+            0x05, // ADD 5 bytes
+            b'h', b'e', b'l', b'l', b'o',
+        ];
+        let result = apply(&[], &delta).unwrap();
+        assert_eq!(result, b"hello");
+    }
+
+    #[test]
+    fn apply_copy_only() {
+        // Header: orig_size=5, mod_size=5
+        // COPY offset=0, len=5
+        let delta = [
+            0x05, // orig_size = 5
+            0x05, // mod_size = 5
+            0x90, // COPY: control=0x90 (0x80 | 0x10), offset=0, size byte present
+            0x05, // size = 5
+        ];
+        let original = b"hello";
+        let result = apply(original, &delta).unwrap();
+        assert_eq!(result, b"hello");
+    }
+
+    #[test]
+    fn apply_copy_with_offset() {
+        // Header: orig_size=10, mod_size=5
+        // COPY offset=5, len=5
+        let delta = [
+            0x0A, // orig_size = 10
+            0x05, // mod_size = 5
+            0x91, // COPY: 0x80 | 0x10 | 0x01 (offset1 + size1 present)
+            0x05, // offset = 5
+            0x05, // size = 5
+        ];
+        let original = b"helloworld";
+        let result = apply(original, &delta).unwrap();
+        assert_eq!(result, b"world");
+    }
+
+    #[test]
+    fn apply_mixed_instructions() {
+        // Create "HELLO world" from "hello world"
+        // Header: orig_size=11, mod_size=11
+        // ADD 5: "HELLO"
+        // COPY offset=5, len=6: " world"
+        let delta = [
+            0x0B, // orig_size = 11
+            0x0B, // mod_size = 11
+            0x05, // ADD 5 bytes
+            b'H', b'E', b'L', b'L', b'O', // "HELLO"
+            0x91, // COPY: offset1 + size1 present
+            0x05, // offset = 5
+            0x06, // size = 6
+        ];
+        let original = b"hello world";
+        let result = apply(original, &delta).unwrap();
+        assert_eq!(result, b"HELLO world");
+    }
+
+    #[test]
+    fn apply_copy_size_zero_means_65536() {
+        // When size bytes result in 0, it means 65536
+        // Header: orig_size=65536, mod_size=65536
+        // COPY offset=0, len=65536 (encoded as 0)
+        let original = vec![0xAB; 65536];
+        let delta = [
+            0x80, 0x80, 0x04, // orig_size = 65536 (varint)
+            0x80, 0x80, 0x04, // mod_size = 65536 (varint)
+            0x80, // COPY: no offset bytes, no size bytes = offset 0, size 65536
+        ];
+        let result = apply(&original, &delta).unwrap();
+        assert_eq!(result.len(), 65536);
+        assert_eq!(result, original);
+    }
+
+    #[test]
+    fn error_original_size_mismatch() {
+        let delta = [
+            0x0A, // orig_size = 10
+            0x05, // mod_size = 5
+        ];
+        let original = b"short"; // only 5 bytes
+        let err = apply(original, &delta).unwrap_err();
+        assert!(matches!(
+            err,
+            DeltaError::OriginalSizeMismatch {
+                expected: 10,
+                actual: 5
+            }
+        ));
+    }
+
+    #[test]
+    fn error_copy_out_of_bounds() {
+        let delta = [
+            0x05, // orig_size = 5
+            0x05, // mod_size = 5
+            0x91, // COPY
+            0x0A, // offset = 10 (out of bounds!)
+            0x05, // size = 5
+        ];
+        let original = b"hello";
+        let err = apply(original, &delta).unwrap_err();
+        assert!(matches!(err, DeltaError::CopyOutOfBounds { .. }));
+    }
+
+    #[test]
+    fn error_unexpected_eof_in_add() {
+        let delta = [
+            0x00, // orig_size = 0
+            0x05, // mod_size = 5
+            0x05, // ADD 5 bytes
+            b'h', b'i', // only 2 bytes provided
+        ];
+        let err = apply(&[], &delta).unwrap_err();
+        assert_eq!(err, DeltaError::UnexpectedEof);
+    }
+}
diff --git a/src/binary/mod.rs b/src/binary/mod.rs
index 696f0d6..c1c7ab4 100644
--- a/src/binary/mod.rs
+++ b/src/binary/mod.rs
@@ -5,6 +5,11 @@
 //!
 //! Based on [DiffX Binary Diffs specification](https://diffx.org/spec/binary-diffs.html).
 
+#[cfg(feature = "binary")]
+mod base85;
+#[cfg(feature = "binary")]
+mod delta;
+
 use std::{fmt, ops::Range};
 
 /// The type of a binary patch block.
@@ -54,9 +59,69 @@ pub enum BinaryPatch<'a> {
     ///
     /// This represents the `Binary files a/path and b/path differ` case,
     /// where git detected a binary change but didn't include the actual data.
+    ///
+    /// Calling [`apply()`](Self::apply) on this variant returns an error.
     Marker,
 }
 
+impl<'a> BinaryPatch<'a> {
+    /// Applies a binary patch forward: original -> modified.
+    ///
+    /// - If the forward block is `Literal`: returns the decoded content directly.
+    /// - If the forward block is `Delta`: applies delta instructions to `original`.
+    ///
+    /// Unlike `git apply`, this doesn't validate the original content hash.
+    #[cfg(feature = "binary")]
+    pub fn apply(&self, original: &[u8]) -> Result<Vec<u8>, BinaryPatchParseError> {
+        match self {
+            BinaryPatch::Full { forward, .. } => Self::apply_block(forward, original),
+            BinaryPatch::Marker => Err(BinaryPatchParseErrorKind::NoBinaryData.into()),
+        }
+    }
+
+    /// Applies a binary patch in reverse: modified -> original.
+    ///
+    /// - If the reverse block is `Literal`: returns the decoded content directly.
+    /// - If the reverse block is `Delta`: applies delta instructions to `modified`.
+    ///
+    /// Unlike `git apply`, this doesn't validate the modified content hash.
+    #[cfg(feature = "binary")]
+    pub fn apply_reverse(&self, modified: &[u8]) -> Result<Vec<u8>, BinaryPatchParseError> {
+        match self {
+            BinaryPatch::Full { reverse, .. } => Self::apply_block(reverse, modified),
+            BinaryPatch::Marker => Err(BinaryPatchParseErrorKind::NoBinaryData.into()),
+        }
+    }
+
+    /// Applies a single block (either literal or delta).
+    #[cfg(feature = "binary")]
+    fn apply_block(block: &BinaryBlock<'_>, base: &[u8]) -> Result<Vec<u8>, BinaryPatchParseError> {
+        match block.kind {
+            BinaryBlockKind::Literal => Self::decode_data(&block.data),
+            BinaryBlockKind::Delta => {
+                let delta_instructions = Self::decode_data(&block.data)?;
+                delta::apply(base, &delta_instructions).map_err(BinaryPatchParseError::from)
+            }
+        }
+    }
+
+    /// See [Decoding Logic](https://diffx.org/spec/binary-diffs.html#decoding-logic)
+    #[cfg(feature = "binary")]
+    fn decode_data(binary_data: &BinaryData<'_>) -> Result<Vec<u8>, BinaryPatchParseError> {
+        use std::io::Read;
+
+        let compressed = decode_base85_lines(binary_data.data)?;
+
+        let mut decoder = flate2::read::ZlibDecoder::new(&compressed[..]);
+        let mut decompressed = Vec::new();
+        decoder
+            .read_to_end(&mut decompressed)
+            .map_err(|e| BinaryPatchParseErrorKind::DecompressionFailed(e.to_string()))?;
+
+        Ok(decompressed)
+    }
+}
+
 /// Represents a single binary payload in a Git binary diff.
 ///
 /// For example, the following patch block
@@ -115,6 +180,20 @@ impl fmt::Display for BinaryPatchParseError {
 
 impl std::error::Error for BinaryPatchParseError {}
 
+#[cfg(feature = "binary")]
+impl From<base85::Base85Error> for BinaryPatchParseError {
+    fn from(e: base85::Base85Error) -> Self {
+        BinaryPatchParseErrorKind::Base85(e).into()
+    }
+}
+
+#[cfg(feature = "binary")]
+impl From<delta::DeltaError> for BinaryPatchParseError {
+    fn from(e: delta::DeltaError) -> Self {
+        BinaryPatchParseErrorKind::Delta(e).into()
+    }
+}
+
 impl From<BinaryPatchParseErrorKind> for BinaryPatchParseError {
     fn from(kind: BinaryPatchParseErrorKind) -> Self {
         Self { kind, span: None }
@@ -126,6 +205,8 @@ impl From<BinaryPatchParseErrorKind> for BinaryPatchParseError {
 #[non_exhaustive]
 pub(crate) enum BinaryPatchParseErrorKind {
     /// Missing or invalid "GIT binary patch" header.
+    // TODO: Switch to #[expect(dead_code)] when MSRV >= 1.81
+    #[cfg_attr(not(feature = "binary"), allow(dead_code))]
     InvalidHeader,
 
     /// First binary block (forward) not found.
@@ -135,10 +216,26 @@ pub(crate) enum BinaryPatchParseErrorKind {
     MissingReverseBlock,
 
     /// No binary data available (marker-only patch).
+    // TODO: Switch to #[expect(dead_code)] when MSRV >= 1.81
+    #[cfg_attr(not(feature = "binary"), allow(dead_code))]
     NoBinaryData,
 
     /// Invalid line length indicator in Base85 data.
+    // TODO: Switch to #[expect(dead_code)] when MSRV >= 1.81
+    #[cfg_attr(not(feature = "binary"), allow(dead_code))]
     InvalidLineLengthIndicator,
+
+    /// Base85 decoding failed.
+    #[cfg(feature = "binary")]
+    Base85(base85::Base85Error),
+
+    /// Delta application failed.
+    #[cfg(feature = "binary")]
+    Delta(delta::DeltaError),
+
+    /// Zlib decompression failed.
+    #[cfg(feature = "binary")]
+    DecompressionFailed(String),
 }
 
 impl fmt::Display for BinaryPatchParseErrorKind {
@@ -149,6 +246,12 @@ impl fmt::Display for BinaryPatchParseErrorKind {
             Self::MissingReverseBlock => write!(f, "second binary block not found"),
             Self::NoBinaryData => write!(f, "no binary data available"),
             Self::InvalidLineLengthIndicator => write!(f, "invalid line length indicator"),
+            #[cfg(feature = "binary")]
+            Self::Base85(e) => write!(f, "{e}"),
+            #[cfg(feature = "binary")]
+            Self::Delta(e) => write!(f, "{e}"),
+            #[cfg(feature = "binary")]
+            Self::DecompressionFailed(msg) => write!(f, "decompression failed: {msg}"),
         }
     }
 }
@@ -273,6 +376,62 @@ fn parse_binary_block<'a>(parser: &mut BinaryParser<'a>) -> Option<BinaryBlock<'
     })
 }
 
+/// Decodes multi-line Base85 data with length indicators.
+///
+/// Each line has the format: `<len_c><data>`
+///
+/// From [5.1.1. Binary Payloads](https://diffx.org/spec/binary-diffs.html#binary-payloads):
+///
+/// > Each line represents up to 52 bytes of pre-encoded data.
+/// > There may be an unlimited number of lines.
+/// > They contain the following fields:
+/// >
+/// >  * `len_c` is a line length character.
+/// >     This encodes the length of the (pre-encoded) data written on this line.
+/// >  * `data` is Base85-encoded data for this line.
+#[cfg(feature = "binary")]
+fn decode_base85_lines(data: &str) -> Result<Vec<u8>, BinaryPatchParseError> {
+    // A rough estimate: In Base85, 5 chars -> 4 bytes
+    let mut result = Vec::with_capacity(data.len() * 4 / 5);
+
+    for line in data.lines() {
+        if line.is_empty() {
+            continue;
+        }
+
+        let line_bytes = line.as_bytes();
+
+        let length = decode_line_length(line_bytes[0])
+            .ok_or(BinaryPatchParseErrorKind::InvalidLineLengthIndicator)?;
+        let encoded = &line[1..];
+        let start = result.len();
+        base85::decode_into(encoded, &mut result)?;
+        result.truncate(start + length);
+    }
+
+    Ok(result)
+}
+
+/// Decodes a line length character to its numeric value.
+///
+/// From [Line Length Characters](https://diffx.org/spec/binary-diffs.html#line-length-characters):
+///
+/// > Each encoded line in a binary diff payload is prefixed by a line length character.
+/// > This encodes the length of the compressed (but not encoded) data for the line.
+/// >
+/// > Line length characters always represent a value between 1 and 52:
+/// >
+/// > * A value of A-Z represents a number between 1..26.
+/// > * A value of a-z represents a number between 27..52.
+#[cfg(feature = "binary")]
+fn decode_line_length(c: u8) -> Option<usize> {
+    match c {
+        b'A'..=b'Z' => Some((c - b'A' + 1) as usize),
+        b'a'..=b'z' => Some((c - b'a' + 27) as usize),
+        _ => None,
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -350,3 +509,54 @@ mod tests {
         }
     }
 }
+
+#[cfg(test)]
+#[cfg(feature = "binary")]
+mod apply_tests {
+    use super::*;
+
+    #[test]
+    fn decode_line_length_uppercase() {
+        assert_eq!(decode_line_length(b'A'), Some(1));
+        assert_eq!(decode_line_length(b'B'), Some(2));
+        assert_eq!(decode_line_length(b'Z'), Some(26));
+    }
+
+    #[test]
+    fn decode_line_length_lowercase() {
+        assert_eq!(decode_line_length(b'a'), Some(27));
+        assert_eq!(decode_line_length(b'b'), Some(28));
+        assert_eq!(decode_line_length(b'z'), Some(52));
+    }
+
+    #[test]
+    fn decode_line_length_invalid() {
+        assert_eq!(decode_line_length(b'0'), None);
+        assert_eq!(decode_line_length(b'!'), None);
+        assert_eq!(decode_line_length(b' '), None);
+    }
+
+    #[test]
+    fn apply_literal_patch() {
+        let input = "GIT binary patch\nliteral 10\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\n\nliteral 0\nKcmV+b0RR6000031\n\n";
+        let (patch, _) = parse_binary_patch(input).unwrap();
+
+        let modified = patch.apply(&[]).unwrap();
+        assert_eq!(modified.len(), 10);
+        assert_eq!(modified, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
+
+        let original = patch.apply_reverse(&[]).unwrap();
+        assert_eq!(original.len(), 0);
+    }
+
+    #[test]
+    fn apply_with_crlf_line_endings() {
+        let input = "GIT binary patch\r\nliteral 10\r\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\r\n\r\nliteral 0\r\nKcmV+b0RR6000031\r\n\r\n";
+        let (patch, _) = parse_binary_patch(input).unwrap();
+
+        let modified = patch.apply(&[]).unwrap();
+        assert_eq!(modified, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
+        let original = patch.apply_reverse(&[]).unwrap();
+        assert_eq!(original.len(), 0);
+    }
+}

From 531e8d99191c933cd1fedd01c8f664ab5e31bc5b Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Sun, 12 Apr 2026 21:54:37 -0400
Subject: [PATCH 16/26] test: add binary patch to compat and replay tests

Now both tests require `binary` Cargo feature.
---
 .github/workflows/ci.yml                      |   1 +
 .github/workflows/replay.yml                  |   2 +-
 Cargo.toml                                    |   8 ++
 tests/compat/common.rs                        |  47 ++++++----
 .../git/binary_and_text_mixed/in/foo.patch    |  16 ++++
 .../git/binary_and_text_mixed/in/image.png    |   2 +
 .../git/binary_and_text_mixed/in/text.txt     |   1 +
 .../git/binary_and_text_mixed/out/image.png   | Bin 0 -> 10 bytes
 .../git/binary_and_text_mixed/out/text.txt    |   1 +
 tests/compat/git/binary_delta/in/foo.patch    |   9 ++
 tests/compat/git/binary_delta/in/large.bin    | Bin 0 -> 5120 bytes
 tests/compat/git/binary_delta/out/large.bin   | Bin 0 -> 5120 bytes
 .../binary_delta_wrong_original/in/foo.patch  |   9 ++
 .../binary_delta_wrong_original/in/large.bin  |   1 +
 tests/compat/git/binary_literal/in/foo.patch  |  10 ++
 tests/compat/git/binary_literal/out/small.bin | Bin 0 -> 10 bytes
 .../binary_literal_wrong_original/in/file.bin |   1 +
 .../in/foo.patch                              |   8 ++
 .../out/file.bin                              | Bin 0 -> 10 bytes
 .../binary_mixed_delta_literal/in/favicon.png | Bin 0 -> 2919 bytes
 .../binary_mixed_delta_literal/in/foo.patch   |  86 ++++++++++++++++++
 .../out/favicon.png                           | Bin 0 -> 1125 bytes
 tests/compat/git/mod.rs                       |  69 ++++++++++++++
 tests/compat/main.rs                          |  10 +-
 tests/replay.rs                               |  76 +++++++++++-----
 25 files changed, 309 insertions(+), 48 deletions(-)
 create mode 100644 tests/compat/git/binary_and_text_mixed/in/foo.patch
 create mode 100644 tests/compat/git/binary_and_text_mixed/in/image.png
 create mode 100644 tests/compat/git/binary_and_text_mixed/in/text.txt
 create mode 100644 tests/compat/git/binary_and_text_mixed/out/image.png
 create mode 100644 tests/compat/git/binary_and_text_mixed/out/text.txt
 create mode 100644 tests/compat/git/binary_delta/in/foo.patch
 create mode 100644 tests/compat/git/binary_delta/in/large.bin
 create mode 100644 tests/compat/git/binary_delta/out/large.bin
 create mode 100644 tests/compat/git/binary_delta_wrong_original/in/foo.patch
 create mode 100644 tests/compat/git/binary_delta_wrong_original/in/large.bin
 create mode 100644 tests/compat/git/binary_literal/in/foo.patch
 create mode 100644 tests/compat/git/binary_literal/out/small.bin
 create mode 100644 tests/compat/git/binary_literal_wrong_original/in/file.bin
 create mode 100644 tests/compat/git/binary_literal_wrong_original/in/foo.patch
 create mode 100644 tests/compat/git/binary_literal_wrong_original/out/file.bin
 create mode 100644 tests/compat/git/binary_mixed_delta_literal/in/favicon.png
 create mode 100644 tests/compat/git/binary_mixed_delta_literal/in/foo.patch
 create mode 100644 tests/compat/git/binary_mixed_delta_literal/out/favicon.png

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b1eb5a6..9bbd188 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,6 +30,7 @@ jobs:
       - run: rustup toolchain install ${{ matrix.rust }} --profile minimal
       - run: cargo +${{ matrix.rust }} check --all-targets --all-features
       - run: cargo +${{ matrix.rust }} test
+      - run: cargo +${{ matrix.rust }} test -F binary
 
   lint:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/replay.yml b/.github/workflows/replay.yml
index 08384d6..48368fc 100644
--- a/.github/workflows/replay.yml
+++ b/.github/workflows/replay.yml
@@ -67,7 +67,7 @@ jobs:
             exit 1
           fi
       - run: rustup toolchain install stable --profile minimal
-      - run: cargo test --release --test replay -- --ignored --nocapture
+      - run: cargo test --release --test replay -F binary -- --ignored --nocapture
         env:
           DIFFY_TEST_REPO: ${{ inputs.repo_url == '' && '.' || 'target/test-repo' }}
           DIFFY_TEST_COMMITS: ${{ inputs.commits }}
diff --git a/Cargo.toml b/Cargo.toml
index 68ce3ff..125a3c7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,3 +27,11 @@ snapbox = { version = "0.6.24", features = ["dir"] }
 [[example]]
 name = "patch_formatter"
 required-features = ["color"]
+
+[[test]]
+name = "compat"
+required-features = ["binary"]
+
+[[test]]
+name = "replay"
+required-features = ["binary"]
diff --git a/tests/compat/common.rs b/tests/compat/common.rs
index c55cdd1..8e5fdda 100644
--- a/tests/compat/common.rs
+++ b/tests/compat/common.rs
@@ -9,7 +9,7 @@ use std::{
 };
 
 use diffy::{
-    binary::BinaryPatch,
+    binary::{BinaryPatch, BinaryPatchParseError},
     patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet, PatchSetParseError},
 };
 
@@ -268,6 +268,7 @@ fn print_patch_version() {
 pub enum TestError {
     Parse(PatchSetParseError),
     Apply(diffy::ApplyError),
+    Binary(BinaryPatchParseError),
 }
 
 impl std::fmt::Display for TestError {
@@ -275,6 +276,7 @@ impl std::fmt::Display for TestError {
         match self {
             TestError::Parse(e) => write!(f, "parse error: {e}"),
             TestError::Apply(e) => write!(f, "apply error: {e}"),
+            TestError::Binary(e) => write!(f, "binary patch error: {e}"),
         }
     }
 }
@@ -352,31 +354,40 @@ pub fn apply_diffy(
             }
         };
 
+        let read_original = || {
+            if let Some(name) = original_name {
+                let original_path = in_dir.join(name);
+                fs::read(&original_path).unwrap_or_default()
+            } else {
+                Vec::new()
+            }
+        };
+
+        let write_modified = |result: &[u8]| {
+            let result_path = output_dir.join(target_name);
+            if let Some(parent) = result_path.parent() {
+                fs::create_dir_all(parent).unwrap();
+            }
+            fs::write(&result_path, result).unwrap();
+        };
+
         match file_patch.patch() {
             PatchKind::Text(patch) => {
-                let original = if let Some(name) = original_name {
-                    let original_path = in_dir.join(name);
-                    fs::read_to_string(&original_path).unwrap_or_else(|e| {
-                        panic!("failed to read {}: {e}", original_path.display())
-                    })
-                } else {
-                    String::new()
-                };
+                let original = String::from_utf8(read_original()).unwrap();
 
                 let result = diffy::apply(&original, patch).map_err(TestError::Apply)?;
 
-                let result_path = output_dir.join(target_name);
-                if let Some(parent) = result_path.parent() {
-                    fs::create_dir_all(parent).unwrap();
-                }
-                fs::write(&result_path, result.as_bytes()).unwrap();
+                write_modified(result.as_bytes());
             }
             PatchKind::Binary(BinaryPatch::Marker) => {
-                // Dont do anything if it is just a binary patch marker.[
+                // Dont do anything if it is just a binary patch marker.
             }
-            PatchKind::Binary(_) => {
-                // Binary patch application requires the `binary` feature.
-                // Will be wired up when that feature is added.
+            PatchKind::Binary(patch) => {
+                let original = read_original();
+
+                let result = patch.apply(&original).map_err(TestError::Binary)?;
+
+                write_modified(&result);
             }
         }
     }
diff --git a/tests/compat/git/binary_and_text_mixed/in/foo.patch b/tests/compat/git/binary_and_text_mixed/in/foo.patch
new file mode 100644
index 0000000..9ae73cb
--- /dev/null
+++ b/tests/compat/git/binary_and_text_mixed/in/foo.patch
@@ -0,0 +1,16 @@
+diff --git a/image.png b/image.png
+index 0dd1608e45a9c4d35bfc1e6f266a796364aa8754..bccac03558b00545e7ea8ced4a3a1ee232cc185a 100644
+GIT binary patch
+literal 10
+UcmV+l0QLWgP)<h;3K|Lk0S%l1s{jB1
+
+literal 8
+ScmV+j0QdiiP)<h;3K|LuM**q;
+
+diff --git a/text.txt b/text.txt
+index 33194a0a6f3f99e366d606c24d9b1ab0e0086e69..b66ba06d315d46280bb09d54614cc52d1677809f 100644
+--- a/text.txt
++++ b/text.txt
+@@ -1 +1 @@
+-old content
++new content
diff --git a/tests/compat/git/binary_and_text_mixed/in/image.png b/tests/compat/git/binary_and_text_mixed/in/image.png
new file mode 100644
index 0000000..0dd1608
--- /dev/null
+++ b/tests/compat/git/binary_and_text_mixed/in/image.png
@@ -0,0 +1,2 @@
+�PNG
+
diff --git a/tests/compat/git/binary_and_text_mixed/in/text.txt b/tests/compat/git/binary_and_text_mixed/in/text.txt
new file mode 100644
index 0000000..33194a0
--- /dev/null
+++ b/tests/compat/git/binary_and_text_mixed/in/text.txt
@@ -0,0 +1 @@
+old content
diff --git a/tests/compat/git/binary_and_text_mixed/out/image.png b/tests/compat/git/binary_and_text_mixed/out/image.png
new file mode 100644
index 0000000000000000000000000000000000000000..bccac03558b00545e7ea8ced4a3a1ee232cc185a
GIT binary patch
literal 10
RcmeAS@N?(olHy`u1ON@30jmH2

literal 0
HcmV?d00001

diff --git a/tests/compat/git/binary_and_text_mixed/out/text.txt b/tests/compat/git/binary_and_text_mixed/out/text.txt
new file mode 100644
index 0000000..b66ba06
--- /dev/null
+++ b/tests/compat/git/binary_and_text_mixed/out/text.txt
@@ -0,0 +1 @@
+new content
diff --git a/tests/compat/git/binary_delta/in/foo.patch b/tests/compat/git/binary_delta/in/foo.patch
new file mode 100644
index 0000000..9abf334
--- /dev/null
+++ b/tests/compat/git/binary_delta/in/foo.patch
@@ -0,0 +1,9 @@
+diff --git a/large.bin b/large.bin
+index ffba9ca51637158e8f46f0a2a9014372778eeef4..a8a025a84f32c7552841b683361d03d3e171294d 100644
+GIT binary patch
+delta 15
+ZcmV+q0Pz2SD1a!CWCZ{J|NpUQm=imI2nhfH
+
+delta 15
+ZcmV+q0Pz2SD1a!CWCQ_9%OJ66m=h@q1w#M;
+
diff --git a/tests/compat/git/binary_delta/in/large.bin b/tests/compat/git/binary_delta/in/large.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ffba9ca51637158e8f46f0a2a9014372778eeef4
GIT binary patch
literal 5120
zcmV+b6#whcY*DIPbx_e&SH6(E=m?lE#2}*s*|N=_Pn6rew1;C{e>&C2V_V*#WK--S
zBVwa$Qc>CVe~2gp%FIc0GITmkOWRdRQEMmVnHk@)n0}jy)YRZ2E>S4v)yQrWOqy%C
zdgGx1O3NTlf2Us8A<bS{uNRx_Xe)J1CJ4n#y+6g=b&>}W>x>EtwIF$OTi6@1sv;(4
zSerdbxr+=xZ*CHL1$$Y~T$p4Ql#o<r<V%m$m8_q!2qK}&h%_pvZs*WnHGNq|n$5jf
zXW4-aL1%Iey8{He8{0yN)ze&fxwkbV@Dg#KXFgYt8i57$pa=WtyKh%pJqsUI6-5w+
z*j$M()a*NyEIRdMvkqI0-RaFFW<!tGUG%9^GK}+h{>nXq!jP!-<g9l}7|Ik+ZVT3T
zh#VbKXiIBSu6N9bv#T8%=Z<vZuT8os*D-Bhz3(E42>SBRm?3{^{)&p}f(rnh>tgKb
z0#OkH>rALj017{~{J53G%CxL`Dy*DE6{&tSlxro?<Fgg1(NcNIi%AwvTsIZHqcdDR
z&il|>3Fc|vT#w|asfzO_Ul0Z*R^kdE#G>fnm!7AlT4LsQiQA1JQJ$~VH7UW}xT0Ar
zR{}HtO#4SxnK5{FQyL3u3R?X$C5`2p`SRH;FOE2<Hi<N^bgbtFEnLbGo$kHK9<Q~Z
zsV-21RssG=-)j(giJk^F{tGWEmY`Q}Dt%ctR-o9azUebcA3j7yLYMxpYHJ1AXN~0=
z?BpktiN`(kk#J@CgS7MUAy7qRsGN_Qx$#G!8$OX!@7a_GI}o?V{dlDCx6bW1S*fj7
zqq&NCGuL9On110|-i!(f*j2iOS5ZLO>8UQeuoI4mJxbsBUgASe$R4tsvVn630le?D
z<SPI!>MN5q`%O)Rccq~)bw>wPs!fx~VfZcbPP?U9f!pLQO-(_^If@v&kSR@~n4asb
zmXL$%jpwQhAcB3){<r7bo?jar2%Qaxm%<PvEVG0Rc0Ymq#K5Te5UvY`oHkAT)uRqA
zafAb4^teHlA8YH(vz@z@z<JTnydh+(6K>9)QPqT5<?x^|3eeNWo<BOc(W%g{SYwl(
zXXfp`c-;AN8?TA9QA$X+3bzD)k&a|#;5WR0SETSv=v}D}6!>9E_@{H|Pi<s`0AIkH
ztvcKHb%<u}dL=nDC(`xj(n8nR(MnBZwHyH}SY5ZyY>g%{_3x0M6)!E_HFJ|=>M~Gs
zQCwuqC7jz*s7kk{zjyQJVA)-#xX>`FoNN*ekORq?UJHH;BJBw3Mf;-h6UTuqy@Eqi
zF?|buaQ}k5X>co?%e8nRuBJG-KO+wWbx5OFfGB-)y7VPv1|VYGJk4IMbQ@`b_{z{2
z`}fZoQxe{^$-dg+i{+4y5iPaFmsgx3D&Q7+7FiYU1Pk1#gOJ)x8yav9>4S>u7S0l`
z-Vx_v@G{J@^6tq(J(ec$wJ^ZJ1WV$gcCds}_O%9%zav3@9T?o3Q5o`OJ&rv|J_0ZQ
z^DS}&B2Gsod;)y2sZM-i5~?~@(3Uh_>qFkF<m%HFOn>@Uyf`Mno%hxhc$nG$xVyFc
zUec{06WnI{V8}@7YU~_h+{b3%Vy`4C>QJo1mF31Os_gZFR470hjHws7t??a}R`X^K
zQCB$+d#d5+MU)W1k>&uzMkTg(fFbR6B2mi=e?y+TQ!!?|0<f67Uh@<fK_y|R3tJYt
z3>K{A@=nf1uqr&(YQcYFI{L$hVD3E@DkNU>X@dKXf+j)=7vwc-Pbly&x0||iaQ~(g
zT3D#h6~k5`x}cK8E>9(1wK}^|z1}3x@G$ZL#?ZiTq7f?<*$VpXj>$IT2@~(WC*e*n
z6<R?~I+#EAI&~){&R*;eb&wrPAuBX=nY?0~bq~3GwG~<-^dagUz_}kiF+uVgRmMdH
zl+a#IeAS))VuPa|aPT?MwIQC&Pkz)K3}-c3L5u_mylEC8x8fLJuj8&ZK%aV$*~<gN
z2<QyKmVVh?FAeUtUq;)fBWMM*7zbkP+2^;Nvied9oftx<i!1B-LT5AI$L)pB2Ad}d
zt5Wcp)|$eI8Jpbjt+QJhq+h;3*h@DYuV&GD`5REsH2dQyr5WtlO$aW2#Zj4P?43go
z<shnZjsL1QpSdLaAh+q9a@D;~N-P@UtpZfHyEomJX0qG`wI=!@)&-3d+HXo{2rt<6
z>uQVCeQ7fJ4;7isICnMpoSba_2oesDirM%K0o?%a;BzmBtz0Qss(z24_d|aG6&O>(
z{{v2hi)5iN?FIK?bC{KeEMqie#(8jM^35J0t_|e}S}k%+S_jg`hVZ8j3K~APN>}Uj
zahvzpmhU>;KP|7vqkdCWTF?zBbcm%#ytAt2yNt46;WQWWy5*Bc2(a`Iogup=I97#c
zXy(B5W*9~tFC;N(1h>!{9^yoZXUVVMAy+-^fa~5|;3|e_VeU*DL<{drWd&w<W=!nv
zgmfq*U(LHMFk~WOm1!CJA}xS;uOr6d3{UdG1JA@rPX;dm<6y1EnT)0k2q^B^3~l}2
z@)ZhZb=H+Jem8vZsjQosxe?3<ys49%&)2Su+{Gn8k@(cFzRH$X&5Aqn6&<(%;#W$C
zr}>VMZ6(+M0E##EkDskklG00;4I<+wi1#G$v~Chc`qIg5%jF&Wk=wdmUJV-fw6~=9
zzqEZw&CTp+GA&E|5d*KnFkSsIYp&6(DXE4O?x{^Z(^ChNjqJIi*Z2NyrI(Y#?GMl7
zlps#EOUvXErQ-3$Fe!NXOv3QIwb<qu*;<r9NKnIJi>HToa>p0TXj$Bi^UoQjW!``l
z@Nrc%U^^a0l5iz*bAd54vC>TRD5k=TJc`vvVW8O0D#<~+Ia}{&W{^c`(3i)HHdKsU
zU-@hy$ReEvxLr9X7W@NG!%WQRq}=owID9M1Djm-dYu=KnvqU`hz}q;Mdv;aG{m3IA
zRz_PF5e;LJ-<fib?!IS-j=6u)<~y3|$~w{G6j#^RIMc0h1E9ksagc69m>s+9zZW0=
zq#jU81@MSGvD#o+?)NsE$J^Ih2qyV=2OyOlr{U%*Dw(`&vL45QUz_ASOl~K~gxt+Q
zEJlrYO(2=|J>G1+f*t>4As{B%(k+nE-{Nfh`T3Z*`3@a)2KB?!qNMC|;8Api+BrOg
z03oHz&!P#4Ur<+#2|Fvi?vQ$4V&<Ee`(U9^{a4$(v%c=WVmB2kj?y&Y`nFbrpw^sT
zZepH;ERVoJ7^vXzTH{b*!`=_-{vAaWq1W#zk-s({C}*g)tEO{(^~Su_!7uog#C9E;
z?GdA$!68BHN$+>0p3vpUe1Lp>h&|;m6HRaL8j>vu*`q4~2}K55!{sMXA)w1c`(ZPN
z)0$Xj_cxW@w!4(<HdHHFNJ!y8jov$=OekBS7Pj>ndA!@`?Rz6gMUbf7(|W?Cm#w0m
z$gP}{QKad(5UrzwZf{k$h&fT?f!r62RB}{;Wy7Xg8<coEi7gzsFCW3Ku+00ySDKKo
zvxn3%$KF+7DtM>pO-B}0irpZ&%kyAqY!^3=|MUtn1FR~~)u09I0)?;5O}LxXc#Aha
zQ;Dhl5?>aDy}9ynzvJzs#(3xekyylTY=eJTQ4ijc+|$YA?s=TvLGt?ai~6OrX9BBg
z*2GrN8@vK#m{xAO2OE^2;~*L}ed2Oz%_H^agmv_qHtn95$?vAU0als~BXv7P{H7q~
z2<L5&_7llv%=o(#H(48Kp;Fi!9mW^tieelarCpCB>U9;6SCO|q)i+@Uaz>Y(2hUZs
zq3z>{I7>;5A{7^<>kPvrN_{>av~k@7>fn*p;0UIFP`$*J)wlYxfgj+5A+wjiSa`@j
z)aP`e&u7gM@Y;3tm@@S=dJCQ-XftSRp(!Eg1EplGo?K~0ty&~6)sG(BVk}as!^~|5
z)l*wPiKfYxt&Y)yJF%`~1jNVIbam_2$Qz)w$@g5QpWg=Kp&1=P-o%d~&x}+JxO{Fh
zscLvTrX}s7qLQf4kb$!=mCw?+FVx1+2_t!r)+$Bb<0i}_wgyh|<r!mg;K`{UO#b98
z_=PrG)>?OR{bxG5Rk?u7ud`8|o{i$#bL5Y3j<mt_Yi8rG&<Yk(TcOzIS$7)ligcbW
zYst*<DJ&=M9n?IsqvO<~N|JUMJw)PC!d%!XR75ilWE;S0&}?4dP6gyBi#z@s!pS1h
zn#O4yIDe_I>1<?>$32rRr)&EnGpTMTtay?y<|&3IfTW@KcU$%3TN2DuT;OA<1VTsn
z9f-0y(t({nRKk~KEuZ)n;$=XJBxOeSFcXWWCLL6~_|CQqU!e(Vyj`su)OI=oyhdy=
z5ok1#IPois2V`h<dEsGk-=*<&LwRrDvYU}3aYkeIehuE}A^yM+O|^>upRGSYd*PoM
zj0!d^VO{X6y+y_!BEoO@#$MB<Sw}6IsyQk0VRBM-2DA%V?@*ZyfnN%k{f#3!Vbl09
zJRUMjzwCo@=N*wRTjF-(_8F6+-dvx}S`b$&Arkqguipx_#=-5Lh2pV{iqc;Q0Q^4-
zUN7v8uH6Y)+|qq0VA27dAeCtr<?fUftHsf)LQ*0OWFp(HE5@h-md`G+N~9YEAXz=i
z4e%ux&M(k(hUoNN%y2c=S%}AT?-p$!!&B{yki*;&^*bodiXRkDYz>ulI2|(`0lbt=
z#nzsXLKycOl?w#&=?Fo;6~}fBY$8h=(~35@IjB0iDaToC>ONJyv?0=M#8+uBLp+ww
z&t_ddbUe@@2s{fXlxh>ov*vBN+Dh$9IE*LBHj-&bUGi!y=?6*+knenfQP+7|Vsb=M
zU+U(mr-})lCx8z5sO?;EQeYTN0KoVKsklKQG=QLc6^gM?ArA<$(@MYgK#$H9Pmtdk
z<9C0eT~>x@MVEZ&G1U{rn&g><t*t*l?<?kR+_XI0=p^?<!@m$923TgMSiDhQ<)i}+
zA?hp_qrF2WMH8y@-Y|a{Xz6=@MF6i>Kwpe#X&^$TB+U&&G*a5keR)BvfqEM;#aApH
zVzoyFu=(TvSAJ5|Cy!+h_CGz1`NL7)7E6+o2Zbxo-MoFo<2?|_z9zpnIv%o1SuK7x
zaZ>=%cUu!X>59F3#I=Ib-4TL+Z9Y|BbGPMNg<Iy|woT5j`gUE^h4*S7Ng*_Ok_mz|
z1o+eNV%ossIp(vVVyR20+E@X|RdcSqyrLCP0bT9BBD5f)8p3kW*-hRt<a%Gwn7!;)
zt0DpM)DVmE21NoE?YC%UR%CLDJ-U{rm})0Qb&=4)^)+m6govhEo|cM-S*Cz=*I(EQ
z@aofa*h-U=>P)XA<!ynta!d7(F9l~c(!FFHl@++^U$&PtbW_rhC+ndIC>$T5eyR{1
z>QO4j!WobyV7xk0F)J6Ui{L+_E~=Q8w3gLUqfCZbIdPhFd>$87JNxB|Mv5o8Iq4rO
z9{uM_RA*6!3I`J<RFa3))YhAhRt2W{;W5LVJEe;yF?k9xHTxC|$ZED8Wb0@Wx^}E*
zf9Y$kT=XuCzE7Wk`xjwG++*3eMUk8o^go`K5GnX6pj+d4e$$p+?t&piD@Nb%yu=cu
z9%pzlWAw^YVGb*7OHCBdI3CYP9{D31dEgN(en)m%SHowC4*1E;d6k=jcqm6L0g!;O
zbcA_*3prsXssGG$8i%@(#6t@|8z90eIu|m~H`z6uw8F<sy_+D47=3)COSXQizP)|C
z2S_NJolm5inkJ?j`>w_kWtMLW>*hhC1-O$QaYTtBy~k{^0kqyjLZJ=_1q2wItMCx}
zA3EIUBEw)qU>R4wg)aJ(GDeCshXQr`T%IE;=yrYok~UNT7Y^&+;R)<AsBHsOpzS6i
z)QF_y#O?UQ{D$+`jQKvPe*~33X$@JY8aqT&9TH%+>bP`biKpV*weBQk2c(_sD5lif
zC2MKzR~%}yh(Grk3JHW^Bk<VlX^69vP`tlVwk61c@;^OAUhy&;!TE|r*Adt{t~3E#
zIAv*eW&BVGl@AeIKG>%D1o(^+!=<l=sOVA#<O$fXpvN&EC+NQY{gk6nkNci&6Uy{t
z%$mny(Zd9O_oJSicF6C-5b;m(Dk<X2R=5Vh_)dxLi*0Neei{53SIq{;GOMOK9>E_3
zn7S^}Y^)$5e_mFPiD=~qdIV{c+dm@#R?|PY@9c&yVusFF)+GyXOdnftGM0VqAdKHU
z5=+q3I^puAIyw-fn~`2DB7+5h`c{#<6w!xMi7&_BBDdz%F=EZjamcz!Z0B94s#7y8
z>wTX>=!?n)1WQYVq*-SbF`@#jNv32D)}VP8CLc6XQRI~w>GJb}452Hpgu&<5o8*VT
zi<$l^&c`DH03?v~g)O7Wv_2Ow$-or2_zy4@#|jLBaf1sQep142<WOnRf075$<1%h2
z2Q-)S7YYFHxT`^!Cxro7(O&S@E%!RF_~Uj|Ij1&m$au(fB-a7xI@P~0#Qj`k&xNzi
zG_m0x+$U#X@@kBpc?3GMB=sn7ursfP?#+zn&E;_tqv4Han-+I0Hzr^*VZTv_;nK=`
zIo7~;wWr}6*{m`*QB%j^27rf|bXsp++Y~tPEZ&}=i4+D7YGzLG$_g)hQTfZsYx)<1
z$cFL?5Eq1Md6sLIfDH+tK|)jwr;5*;5n7)fi!nT4mKzJTkjv#S)}l-%<>KTO`*v_x
z$N<Gh*;D=;Ql@o!id6floU9Z!{&;j4*nrD|n>=RsAzD5zwawueOGUJ?3jT}z<03j8
iFeF}$w^8qC1SDUCXp$k{m_!>&X-}m2V55dcq+r238RXvp

literal 0
HcmV?d00001

diff --git a/tests/compat/git/binary_delta/out/large.bin b/tests/compat/git/binary_delta/out/large.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8a025a84f32c7552841b683361d03d3e171294d
GIT binary patch
literal 5120
zcmV+b6#whcY*DIPbx_e&SH6(E=m?lE#2}*s*|N=_Pn6rew1;C{e>&C2V_V*#WK--S
zBVwa$Qc>CVe~2gp%FIc0GITmkOWRdRQEMmVnHk@)n0}jy)YRZ2E>S4v)yQrWOqy%C
zdgG!0|Ns9^f2Us8A<bS{uNRx_Xe)J1CJ4n#y+6g=b&>}W>x>EtwIF$OTi6@1sv;(4
zSerdbxr+=xZ*CHL1$$Y~T$p4Ql#o<r<V%m$m8_q!2qK}&h%_pvZs*WnHGNq|n$5jf
zXW4-aL1%Iey8{He8{0yN)ze&fxwkbV@Dg#KXFgYt8i57$pa=WtyKh%pJqsUI6-5w+
z*j$M()a*NyEIRdMvkqI0-RaFFW<!tGUG%9^GK}+h{>nXq!jP!-<g9l}7|Ik+ZVT3T
zh#VbKXiIBSu6N9bv#T8%=Z<vZuT8os*D-Bhz3(E42>SBRm?3{^{)&p}f(rnh>tgKb
z0#OkH>rALj017{~{J53G%CxL`Dy*DE6{&tSlxro?<Fgg1(NcNIi%AwvTsIZHqcdDR
z&il|>3Fc|vT#w|asfzO_Ul0Z*R^kdE#G>fnm!7AlT4LsQiQA1JQJ$~VH7UW}xT0Ar
zR{}HtO#4SxnK5{FQyL3u3R?X$C5`2p`SRH;FOE2<Hi<N^bgbtFEnLbGo$kHK9<Q~Z
zsV-21RssG=-)j(giJk^F{tGWEmY`Q}Dt%ctR-o9azUebcA3j7yLYMxpYHJ1AXN~0=
z?BpktiN`(kk#J@CgS7MUAy7qRsGN_Qx$#G!8$OX!@7a_GI}o?V{dlDCx6bW1S*fj7
zqq&NCGuL9On110|-i!(f*j2iOS5ZLO>8UQeuoI4mJxbsBUgASe$R4tsvVn630le?D
z<SPI!>MN5q`%O)Rccq~)bw>wPs!fx~VfZcbPP?U9f!pLQO-(_^If@v&kSR@~n4asb
zmXL$%jpwQhAcB3){<r7bo?jar2%Qaxm%<PvEVG0Rc0Ymq#K5Te5UvY`oHkAT)uRqA
zafAb4^teHlA8YH(vz@z@z<JTnydh+(6K>9)QPqT5<?x^|3eeNWo<BOc(W%g{SYwl(
zXXfp`c-;AN8?TA9QA$X+3bzD)k&a|#;5WR0SETSv=v}D}6!>9E_@{H|Pi<s`0AIkH
ztvcKHb%<u}dL=nDC(`xj(n8nR(MnBZwHyH}SY5ZyY>g%{_3x0M6)!E_HFJ|=>M~Gs
zQCwuqC7jz*s7kk{zjyQJVA)-#xX>`FoNN*ekORq?UJHH;BJBw3Mf;-h6UTuqy@Eqi
zF?|buaQ}k5X>co?%e8nRuBJG-KO+wWbx5OFfGB-)y7VPv1|VYGJk4IMbQ@`b_{z{2
z`}fZoQxe{^$-dg+i{+4y5iPaFmsgx3D&Q7+7FiYU1Pk1#gOJ)x8yav9>4S>u7S0l`
z-Vx_v@G{J@^6tq(J(ec$wJ^ZJ1WV$gcCds}_O%9%zav3@9T?o3Q5o`OJ&rv|J_0ZQ
z^DS}&B2Gsod;)y2sZM-i5~?~@(3Uh_>qFkF<m%HFOn>@Uyf`Mno%hxhc$nG$xVyFc
zUec{06WnI{V8}@7YU~_h+{b3%Vy`4C>QJo1mF31Os_gZFR470hjHws7t??a}R`X^K
zQCB$+d#d5+MU)W1k>&uzMkTg(fFbR6B2mi=e?y+TQ!!?|0<f67Uh@<fK_y|R3tJYt
z3>K{A@=nf1uqr&(YQcYFI{L$hVD3E@DkNU>X@dKXf+j)=7vwc-Pbly&x0||iaQ~(g
zT3D#h6~k5`x}cK8E>9(1wK}^|z1}3x@G$ZL#?ZiTq7f?<*$VpXj>$IT2@~(WC*e*n
z6<R?~I+#EAI&~){&R*;eb&wrPAuBX=nY?0~bq~3GwG~<-^dagUz_}kiF+uVgRmMdH
zl+a#IeAS))VuPa|aPT?MwIQC&Pkz)K3}-c3L5u_mylEC8x8fLJuj8&ZK%aV$*~<gN
z2<QyKmVVh?FAeUtUq;)fBWMM*7zbkP+2^;Nvied9oftx<i!1B-LT5AI$L)pB2Ad}d
zt5Wcp)|$eI8Jpbjt+QJhq+h;3*h@DYuV&GD`5REsH2dQyr5WtlO$aW2#Zj4P?43go
z<shnZjsL1QpSdLaAh+q9a@D;~N-P@UtpZfHyEomJX0qG`wI=!@)&-3d+HXo{2rt<6
z>uQVCeQ7fJ4;7isICnMpoSba_2oesDirM%K0o?%a;BzmBtz0Qss(z24_d|aG6&O>(
z{{v2hi)5iN?FIK?bC{KeEMqie#(8jM^35J0t_|e}S}k%+S_jg`hVZ8j3K~APN>}Uj
zahvzpmhU>;KP|7vqkdCWTF?zBbcm%#ytAt2yNt46;WQWWy5*Bc2(a`Iogup=I97#c
zXy(B5W*9~tFC;N(1h>!{9^yoZXUVVMAy+-^fa~5|;3|e_VeU*DL<{drWd&w<W=!nv
zgmfq*U(LHMFk~WOm1!CJA}xS;uOr6d3{UdG1JA@rPX;dm<6y1EnT)0k2q^B^3~l}2
z@)ZhZb=H+Jem8vZsjQosxe?3<ys49%&)2Su+{Gn8k@(cFzRH$X&5Aqn6&<(%;#W$C
zr}>VMZ6(+M0E##EkDskklG00;4I<+wi1#G$v~Chc`qIg5%jF&Wk=wdmUJV-fw6~=9
zzqEZw&CTp+GA&E|5d*KnFkSsIYp&6(DXE4O?x{^Z(^ChNjqJIi*Z2NyrI(Y#?GMl7
zlps#EOUvXErQ-3$Fe!NXOv3QIwb<qu*;<r9NKnIJi>HToa>p0TXj$Bi^UoQjW!``l
z@Nrc%U^^a0l5iz*bAd54vC>TRD5k=TJc`vvVW8O0D#<~+Ia}{&W{^c`(3i)HHdKsU
zU-@hy$ReEvxLr9X7W@NG!%WQRq}=owID9M1Djm-dYu=KnvqU`hz}q;Mdv;aG{m3IA
zRz_PF5e;LJ-<fib?!IS-j=6u)<~y3|$~w{G6j#^RIMc0h1E9ksagc69m>s+9zZW0=
zq#jU81@MSGvD#o+?)NsE$J^Ih2qyV=2OyOlr{U%*Dw(`&vL45QUz_ASOl~K~gxt+Q
zEJlrYO(2=|J>G1+f*t>4As{B%(k+nE-{Nfh`T3Z*`3@a)2KB?!qNMC|;8Api+BrOg
z03oHz&!P#4Ur<+#2|Fvi?vQ$4V&<Ee`(U9^{a4$(v%c=WVmB2kj?y&Y`nFbrpw^sT
zZepH;ERVoJ7^vXzTH{b*!`=_-{vAaWq1W#zk-s({C}*g)tEO{(^~Su_!7uog#C9E;
z?GdA$!68BHN$+>0p3vpUe1Lp>h&|;m6HRaL8j>vu*`q4~2}K55!{sMXA)w1c`(ZPN
z)0$Xj_cxW@w!4(<HdHHFNJ!y8jov$=OekBS7Pj>ndA!@`?Rz6gMUbf7(|W?Cm#w0m
z$gP}{QKad(5UrzwZf{k$h&fT?f!r62RB}{;Wy7Xg8<coEi7gzsFCW3Ku+00ySDKKo
zvxn3%$KF+7DtM>pO-B}0irpZ&%kyAqY!^3=|MUtn1FR~~)u09I0)?;5O}LxXc#Aha
zQ;Dhl5?>aDy}9ynzvJzs#(3xekyylTY=eJTQ4ijc+|$YA?s=TvLGt?ai~6OrX9BBg
z*2GrN8@vK#m{xAO2OE^2;~*L}ed2Oz%_H^agmv_qHtn95$?vAU0als~BXv7P{H7q~
z2<L5&_7llv%=o(#H(48Kp;Fi!9mW^tieelarCpCB>U9;6SCO|q)i+@Uaz>Y(2hUZs
zq3z>{I7>;5A{7^<>kPvrN_{>av~k@7>fn*p;0UIFP`$*J)wlYxfgj+5A+wjiSa`@j
z)aP`e&u7gM@Y;3tm@@S=dJCQ-XftSRp(!Eg1EplGo?K~0ty&~6)sG(BVk}as!^~|5
z)l*wPiKfYxt&Y)yJF%`~1jNVIbam_2$Qz)w$@g5QpWg=Kp&1=P-o%d~&x}+JxO{Fh
zscLvTrX}s7qLQf4kb$!=mCw?+FVx1+2_t!r)+$Bb<0i}_wgyh|<r!mg;K`{UO#b98
z_=PrG)>?OR{bxG5Rk?u7ud`8|o{i$#bL5Y3j<mt_Yi8rG&<Yk(TcOzIS$7)ligcbW
zYst*<DJ&=M9n?IsqvO<~N|JUMJw)PC!d%!XR75ilWE;S0&}?4dP6gyBi#z@s!pS1h
zn#O4yIDe_I>1<?>$32rRr)&EnGpTMTtay?y<|&3IfTW@KcU$%3TN2DuT;OA<1VTsn
z9f-0y(t({nRKk~KEuZ)n;$=XJBxOeSFcXWWCLL6~_|CQqU!e(Vyj`su)OI=oyhdy=
z5ok1#IPois2V`h<dEsGk-=*<&LwRrDvYU}3aYkeIehuE}A^yM+O|^>upRGSYd*PoM
zj0!d^VO{X6y+y_!BEoO@#$MB<Sw}6IsyQk0VRBM-2DA%V?@*ZyfnN%k{f#3!Vbl09
zJRUMjzwCo@=N*wRTjF-(_8F6+-dvx}S`b$&Arkqguipx_#=-5Lh2pV{iqc;Q0Q^4-
zUN7v8uH6Y)+|qq0VA27dAeCtr<?fUftHsf)LQ*0OWFp(HE5@h-md`G+N~9YEAXz=i
z4e%ux&M(k(hUoNN%y2c=S%}AT?-p$!!&B{yki*;&^*bodiXRkDYz>ulI2|(`0lbt=
z#nzsXLKycOl?w#&=?Fo;6~}fBY$8h=(~35@IjB0iDaToC>ONJyv?0=M#8+uBLp+ww
z&t_ddbUe@@2s{fXlxh>ov*vBN+Dh$9IE*LBHj-&bUGi!y=?6*+knenfQP+7|Vsb=M
zU+U(mr-})lCx8z5sO?;EQeYTN0KoVKsklKQG=QLc6^gM?ArA<$(@MYgK#$H9Pmtdk
z<9C0eT~>x@MVEZ&G1U{rn&g><t*t*l?<?kR+_XI0=p^?<!@m$923TgMSiDhQ<)i}+
zA?hp_qrF2WMH8y@-Y|a{Xz6=@MF6i>Kwpe#X&^$TB+U&&G*a5keR)BvfqEM;#aApH
zVzoyFu=(TvSAJ5|Cy!+h_CGz1`NL7)7E6+o2Zbxo-MoFo<2?|_z9zpnIv%o1SuK7x
zaZ>=%cUu!X>59F3#I=Ib-4TL+Z9Y|BbGPMNg<Iy|woT5j`gUE^h4*S7Ng*_Ok_mz|
z1o+eNV%ossIp(vVVyR20+E@X|RdcSqyrLCP0bT9BBD5f)8p3kW*-hRt<a%Gwn7!;)
zt0DpM)DVmE21NoE?YC%UR%CLDJ-U{rm})0Qb&=4)^)+m6govhEo|cM-S*Cz=*I(EQ
z@aofa*h-U=>P)XA<!ynta!d7(F9l~c(!FFHl@++^U$&PtbW_rhC+ndIC>$T5eyR{1
z>QO4j!WobyV7xk0F)J6Ui{L+_E~=Q8w3gLUqfCZbIdPhFd>$87JNxB|Mv5o8Iq4rO
z9{uM_RA*6!3I`J<RFa3))YhAhRt2W{;W5LVJEe;yF?k9xHTxC|$ZED8Wb0@Wx^}E*
zf9Y$kT=XuCzE7Wk`xjwG++*3eMUk8o^go`K5GnX6pj+d4e$$p+?t&piD@Nb%yu=cu
z9%pzlWAw^YVGb*7OHCBdI3CYP9{D31dEgN(en)m%SHowC4*1E;d6k=jcqm6L0g!;O
zbcA_*3prsXssGG$8i%@(#6t@|8z90eIu|m~H`z6uw8F<sy_+D47=3)COSXQizP)|C
z2S_NJolm5inkJ?j`>w_kWtMLW>*hhC1-O$QaYTtBy~k{^0kqyjLZJ=_1q2wItMCx}
zA3EIUBEw)qU>R4wg)aJ(GDeCshXQr`T%IE;=yrYok~UNT7Y^&+;R)<AsBHsOpzS6i
z)QF_y#O?UQ{D$+`jQKvPe*~33X$@JY8aqT&9TH%+>bP`biKpV*weBQk2c(_sD5lif
zC2MKzR~%}yh(Grk3JHW^Bk<VlX^69vP`tlVwk61c@;^OAUhy&;!TE|r*Adt{t~3E#
zIAv*eW&BVGl@AeIKG>%D1o(^+!=<l=sOVA#<O$fXpvN&EC+NQY{gk6nkNci&6Uy{t
z%$mny(Zd9O_oJSicF6C-5b;m(Dk<X2R=5Vh_)dxLi*0Neei{53SIq{;GOMOK9>E_3
zn7S^}Y^)$5e_mFPiD=~qdIV{c+dm@#R?|PY@9c&yVusFF)+GyXOdnftGM0VqAdKHU
z5=+q3I^puAIyw-fn~`2DB7+5h`c{#<6w!xMi7&_BBDdz%F=EZjamcz!Z0B94s#7y8
z>wTX>=!?n)1WQYVq*-SbF`@#jNv32D)}VP8CLc6XQRI~w>GJb}452Hpgu&<5o8*VT
zi<$l^&c`DH03?v~g)O7Wv_2Ow$-or2_zy4@#|jLBaf1sQep142<WOnRf075$<1%h2
z2Q-)S7YYFHxT`^!Cxro7(O&S@E%!RF_~Uj|Ij1&m$au(fB-a7xI@P~0#Qj`k&xNzi
zG_m0x+$U#X@@kBpc?3GMB=sn7ursfP?#+zn&E;_tqv4Han-+I0Hzr^*VZTv_;nK=`
zIo7~;wWr}6*{m`*QB%j^27rf|bXsp++Y~tPEZ&}=i4+D7YGzLG$_g)hQTfZsYx)<1
z$cFL?5Eq1Md6sLIfDH+tK|)jwr;5*;5n7)fi!nT4mKzJTkjv#S)}l-%<>KTO`*v_x
z$N<Gh*;D=;Ql@o!id6floU9Z!{&;j4*nrD|n>=RsAzD5zwawueOGUJ?3jT}z<03j8
iFeF}$w^8qC1SDUCXp$k{m_!>&X-}m2V55dcq+r3GG3TWK

literal 0
HcmV?d00001

diff --git a/tests/compat/git/binary_delta_wrong_original/in/foo.patch b/tests/compat/git/binary_delta_wrong_original/in/foo.patch
new file mode 100644
index 0000000..9abf334
--- /dev/null
+++ b/tests/compat/git/binary_delta_wrong_original/in/foo.patch
@@ -0,0 +1,9 @@
+diff --git a/large.bin b/large.bin
+index ffba9ca51637158e8f46f0a2a9014372778eeef4..a8a025a84f32c7552841b683361d03d3e171294d 100644
+GIT binary patch
+delta 15
+ZcmV+q0Pz2SD1a!CWCZ{J|NpUQm=imI2nhfH
+
+delta 15
+ZcmV+q0Pz2SD1a!CWCQ_9%OJ66m=h@q1w#M;
+
diff --git a/tests/compat/git/binary_delta_wrong_original/in/large.bin b/tests/compat/git/binary_delta_wrong_original/in/large.bin
new file mode 100644
index 0000000..0d6307b
--- /dev/null
+++ b/tests/compat/git/binary_delta_wrong_original/in/large.bin
@@ -0,0 +1 @@
+WRONG CONTENT
\ No newline at end of file
diff --git a/tests/compat/git/binary_literal/in/foo.patch b/tests/compat/git/binary_literal/in/foo.patch
new file mode 100644
index 0000000..61ea8bf
--- /dev/null
+++ b/tests/compat/git/binary_literal/in/foo.patch
@@ -0,0 +1,10 @@
+diff --git a/small.bin b/small.bin
+new file mode 100644
+index 0000000000000000000000000000000000000000..df93f5f3f72487244976c34e85525cf445016566
+GIT binary patch
+literal 10
+UcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k
+
+literal 0
+KcmV+b0RR6000031
+
diff --git a/tests/compat/git/binary_literal/out/small.bin b/tests/compat/git/binary_literal/out/small.bin
new file mode 100644
index 0000000000000000000000000000000000000000..df93f5f3f72487244976c34e85525cf445016566
GIT binary patch
literal 10
RcmZQzWMXDvWn<^y1ONc904@Lk

literal 0
HcmV?d00001

diff --git a/tests/compat/git/binary_literal_wrong_original/in/file.bin b/tests/compat/git/binary_literal_wrong_original/in/file.bin
new file mode 100644
index 0000000..605cc40
--- /dev/null
+++ b/tests/compat/git/binary_literal_wrong_original/in/file.bin
@@ -0,0 +1 @@
+WRONG CONTENT - not matching the hash in patch
\ No newline at end of file
diff --git a/tests/compat/git/binary_literal_wrong_original/in/foo.patch b/tests/compat/git/binary_literal_wrong_original/in/foo.patch
new file mode 100644
index 0000000..d3e39d9
--- /dev/null
+++ b/tests/compat/git/binary_literal_wrong_original/in/foo.patch
@@ -0,0 +1,8 @@
+diff --git a/file.bin b/file.bin
+index df93f5f3f72487244976c34e85525cf445016566..a8a025a84f32c7552841b683361d03d3e171294d 100644
+GIT binary patch
+literal 10
+UcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k
+
+literal 10
+UcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k
diff --git a/tests/compat/git/binary_literal_wrong_original/out/file.bin b/tests/compat/git/binary_literal_wrong_original/out/file.bin
new file mode 100644
index 0000000000000000000000000000000000000000..df93f5f3f72487244976c34e85525cf445016566
GIT binary patch
literal 10
RcmZQzWMXDvWn<^y1ONc904@Lk

literal 0
HcmV?d00001

diff --git a/tests/compat/git/binary_mixed_delta_literal/in/favicon.png b/tests/compat/git/binary_mixed_delta_literal/in/favicon.png
new file mode 100644
index 0000000000000000000000000000000000000000..5109c1de8bea744180448b347fb44ddef23b90f4
GIT binary patch
literal 2919
zcmai030P8T7skEJu&Hdcv?0>65<zk$a0QhVGZEA@6X7EFR~ES-xa2ZTO;$5yTBerm
z)>vtoO=gysTc){dn_8A;mgY3Mwv7J;8=I#3|L3_2=X>9CzO#MjlI-o}Hd}k4HUfc|
zO?4;xz;Be|o}mdpp_rIq1VX(S^!1nc(>w`mA>W+E5rzWha=r-05eTA#T*P9B0}@my
z5C#fJ=%K5nXcWjHq5Z6AIGV@>;DPQjVt^jw<;#u<XWMbm4))qaIRQ4n2P7<%oF5^8
z2yzlyX_o;1RxD%DC?!P_PD1<ByiqPfF@Ul%w=&0}?X^)vF^5a=A-jH5h9wf3Cy|H<
zSgcGYGncJ47mCBM7It=aSR5XU$75gy28tF)SaOU2TB<NHWrGYrY%wU3fI<OEVUraq
zluAfwG>oG@8VBUyC+h;}WB70)unGigVUEN8lP&?d-=HgyFX$qkP$GnQLeX>ppKUP>
zf1wDc@9QdZmgs9_8ts46`TWnqfFzVCI1wL{`k7CEZvgp5ivX+-012gHHb9921d^r7
zOepd~@CM~Tgg+VN0|E%nE(yIF|1U7$8=#9YLMW!gO$Ct97D^pO<_J_iD-7@hITGHq
ztxxNofFSJOfWor~siL$D0SL!3wZBrw{}V<++u-4*QWRVX1$-5nt^+~o9ZkrAom;Kc
z`UFv3T)f3XE*Jr0kdK=)ib`>@wy?9d#^BAT0)dN0fOSD`G?^uVS$G^Chr!ukEO5RS
zHUvvN0cQn^1l*KAN;Z5JI4lY4|M3+ajUvJtG#bGjgd{?7v~p>x&FMholr$wqfXe<x
zp_El7u-J+uk<cQskRxRS9A#LrxhWJ9awRgB7;p-M3rs>gak(Iz&S(^TbignHAV!(e
zSYp&_3zQ<56`$+McmQno+fiaa`iNDWy}z9#V!sH3s3iR3u)=y%Yw!?&rwMj?oWR2L
zKmr8t6cWSZCPU1#K_FBTLAXF)6b6AXO_8QFtRk<UU+u7(SK&E-)~ub>a%~Izh4ror
z*EZ_|=QE<;c-&c(KbCDA2%Is!@H+g)rZQ&0D#{5Cz@kMoLfJM}sdZNl#oCX>4%v0a
znisL7DOS0$j+g^6mUBlNs`}n%hY?$wJRPq86v`{7)93jX?p_lVRnlJ&&afZLZ#&hz
zO*L;r_Yz7At=pnIS&x1(I<P39#>D65shfw2gS^Mv+f$LH_YO+OYq_KaMk=!7=N$s8
zYVb^P@k%r7@*~gE_dVG>kS=f8ngJ&BpPbVtC(o&BusyzO&arf-3>V3FCjyziw}vL}
z9Oh8PdKp&CrT}9PWUANt^pi0gLS2UocM;D-O-_!s-5S<7^r8#bN(^WZTqFPeacoP0
zy)NeEyBWEQyN`L_-b6@Dk~P_QYzX;bVSD#L!3h)D<2bbqiwl3s9^aQ+(V3xBlAVY}
z((ZcO3=N+xYIsemXzw|b7b4Ro^u(3l)Me_0bv4ljj>4afnIiW92!WV2PjRatG7sv*
zpP6bZ*~!;`Yi0U6&-pUA8W*JpZr^s@LEElz(fOFOSxl}IQgcO)DY)q9iK>b%>u&__
zvRF9OMNB(k`N6GOoqZPEZY5um`P|9V%DR}L>-`rGr4p1@kcTSTc%3vbz{xP`*qS+<
zRl?zRy}OoBI<{=+M)PZfMxH~$tT?`djmk)EW4fNbO1!E#X-UWeTiuEXf-vFTtUuHS
z%c>{aPOG#ca`;4~smG4gTVB7OEBF6ZZRp{SU~?`nB*f81&>>G<isc(k>@<k3ei}IE
zo<^O@ru*w{{Y&FX7z}23WTgAeL=n`7oHuWt;hLyj{rzB7bv3;bawk7h8BjmwXiV?f
z^q}gHyH0uIjV)TzH_vEy8X7$IzIs%e|Ned4zWw`O)TAz7`{ec0>IbC>r8?!JFg9}h
z32oc#$D||Zkr31zZ$H2E5}cLQ$@}*M)^!a&yv*?R?TI>OXd04|dAgFp%d+PC2!;8l
zk|J<;{INS(HnDoo>swFLhh7Tjp3`(h%_%E5>*@Eb%#s#0aL<X&UK%?(aHGKNW;aex
zPmj3yt>uc*=6Z?Q$bk9d*ai8|Oy~^8zMPyKV`^t-=c~-|`ud_RhdcWd7D=8|%sj<>
zA7A=zVZ6Jv>@X*rAYDtLP#zQjJCTOAsM><&`NGa(hEys=@*n<w=gbQ|9ZeHzSVJ}g
z1b@rS%A!xyGZHj)R9Bjs$}!aH-9OwN9eij))zHuYcVL4{4lqT_*JXRAJ}innZoEQY
zC->4N#$HYUv+miL+Q_QJePvpR)YMcigQ0iSd;4*>-39u=C2}WQ!>F$6s;U`Vok8)>
zNF&uFB_#(}ht)T>wN-X>bZBR7R4>U&`(V;9b#d9Esik%3RtbL3Y$H1{`GS946ZEnj
z)7IL0;<m*2m&+-<nJWJY4-ee)px5@ipp!sBrxb=4i@Ke2gLHT!-EUW&%!#_9zj$%Z
z_4saH<D*9nWo3zhtrrG^oFE9Q(kZ-rd6gj&zQTDt_jjX%Z|caV^ToryetuUc>h&yB
z^p~mghPtj97>Q&~mJ1n?k&&4}bb4yGLqc9k%6GjzJw+4wiJI<(-Hv-%0sGtYHon}`
z(C0xfVU=q#n9SUQ0v{5Iw72I~<j-OVy0&W#|3KZ3k6wzbCH>7NR3A6DYM+KY^*`<-
z<J9V|uHvk?cP-ZDj9-2|Gj1_Y!~L)M@fyKu7;3U3%8|ulxzeb|!`shx|Jhr6(0+4f
zO-+pkp3P>Hk2kfhb5Je4BJD-A+*@(Gwzg%9mTv3ayQk{wy-2a6X6N$1FCQN-$_u`h
zo18qe^Qvs!8RogQw9RQ1N3ymRCRN~cdV70oENMpeakk-FFJ8RhWafCb46iJ+^&ad^
zbP5f6i)%}2YFfYXikJcbK!CyS>ZjW3s>QKmc`-3D9aXiWUpTLR8`n#Y>~Ar~&P}d!
zh#PM#ivK`>$!Te6A(`0*94>bo)K}kh|2~z)S^~G|Br#^#G~juD(&ERuvQ~k5((c6I
z$LnlbhQ{cDfgV)q%*x8jil>pTZf*vKhCL51h1b>9$tRdzq4z!*>5mR~sTq8qYG(R0
z^v`Ab#-mp!`BolVW9>Fmp7j>C(^~?|`pkkF0tfqmbA6Ml6`Ne%l@YX&ivPz{iWfQ0
IIpoLx0#^I3DgXcg

literal 0
HcmV?d00001

diff --git a/tests/compat/git/binary_mixed_delta_literal/in/foo.patch b/tests/compat/git/binary_mixed_delta_literal/in/foo.patch
new file mode 100644
index 0000000..7f52479
--- /dev/null
+++ b/tests/compat/git/binary_mixed_delta_literal/in/foo.patch
@@ -0,0 +1,86 @@
+diff --git a/favicon.png b/favicon.png
+index 5109c1de8bea744180448b347fb44ddef23b90f4..69b8613ce1506e1c92b864f91cc46df06348c62b 100644
+GIT binary patch
+delta 1115
+zcmV-h1f=`t7Uc+#8Gi%-007x@vVQ;o00d`2O+f$vv5yP<VFdsH03c&XQcVB=dL{q>
+zfP?@5`Tzg`fam}Kbua(`>R<o>I+y?e7jT@qQ9J+u00Lr5M??VshmXv^00009a7bBm
+z000DZ000DZ0m8#+P5=M^2XskIMF-{q4Gj?pmKmEP0008eNq<B^R7l5_)lX;~brc8i
+zPqJ&%ZnWlLg|)3nu-5jHf(c5Y8}{JQ=AsewC=?}0X|aceco!A*(py0AAPA<O1g$}>
+zS*l<~QA$1JAY?<@4F!`%H{0#bjE9}fZuU?Ayt&Np_x{ZHeczk+=q00aIO{BOMm>NW
+zGb^R2YFbUrw129ilv&5_?R*#op%sCs<q50^6=I7P>1zv6w<_>da(*43C8z5GS?N}X
+z{^V`QxQx)j*!`A8b;E|$ExHA=4hUt88|nl<E=XYi=A>Q7tEk9hz=YqEbOPg@0Wz`s
+zmN!~(9dV=u?I~Mvh)g?mp_)jf?D@Nr{DQJb<hHOu=YKYd%MK!R&1MhRfBfQoTb@(2
+z2PIE#t40*u*ip1zPtjvIB@(%=V5AXXRwxp=a~~l-MLK38G}{P}vf`$5HvMVA)x`Lk
+z1q&8jYQgunlu0+7b5ltQ#uNp<*XN?kIG-5LX(qWI-<1uz>>GW45GWc;+ig_8yt0(H
+zchI)mFn`!(GAY|t|C#flvV9n{2WCVfKibO<1Qi=LY`EQ$z;UqGqDW*0&cq!s6My4;
+zPXgEE5_G3UA~k24J=gwQb;>^P#8wX*Y*kE)MBeQ{FfT7}!Y4_A1FwN^<FuyWL|oPR
+zjsVRnTuSb~M8;q7sVbLN^{eChP5459rxUkPpMPV{cYaR(zLS13=vffDVyn?erMvXr
+zTX?CfrX<xEC<XiPLElA7U2gX)kj7U6r)9Bbqr>c9)O_GgkHj~RdK~NC^L5i~IUXh_
+z95o@^6-U;DqjIVuk>ys^VPVp56?GT72DqTE5?dt$r77DZ?T}ZyX3UaF&vYMYG>7U(
+z=6{7=Hn2lG;1!X1AGZ(HW&;lyamBia;`62jOIGoOLtacMykp3iKdiS0Xh_|vJ_mY~
+zb4{&%v9!zfYAbcDwo>=i)=iqrbxKXm)PpS7rrhUx{~ymQhCvo@p6LJp03~!qSaf7z
+zbY(hYa%Ew3WdJfTGBPbNF)cAOR53F;F@HBYFgPnPFgh?WYdBZ^0000bbVXQnWMOn=
+zI&E)cX=Zr<GB7eSEif@HF*8&#GdeLhIxsjZFfckWFdAj=K>z>%8FWQhbW?9;ba!EL
+zWdL_~cP?peYja~^aAhuUa%Y?FJQ@H109SfcSaechcOY<bZ)Rz1Wgv8UaAhEAa8z_-
+z004NL<MIp$aaJfO%1_J8N##-i10xGAV+#`_6HAj60}~)bfHcEo10%CkAj{a)$i&RV
+h9K;99fJmT9E&%Em5p?oGHI)DW002ovPDHLkV1gh=-pv32
+
+literal 2919
+zcmV-t3z+nYP)<h;3K|Lk000e1NJLTq001BW001Be1^@s6b9#F8000EKX;fHrSWQei
+zV{dIPVPtP&WiEJaZ~y=R002BccyM83Yh`pGVr6G(ZapIJyT2kJX=FVjS2ayAO>jeH
+zXlY1#a%EF`PE=!hYhyWNB0oL~Ja{^IZE$U6bYUQPZES9HI(R)IVPtP&WjbziI&Eci
+zVJ{*ecsh7(aCB=uB3MmOAVY6*Wgs;!H7+nBJ_;Z_a%5&YQba}|cx`NMb2@TlW<4Tk
+zbaZe!FE4j@cP@7`E^l&YFEKeeIWI6WFETPMa%5&Lb9rubVR$WWb0Z=?3Lqdna%5&Y
+zL}hbha%pgMX>V>Ia%5&YVPbD}bUh*>3LqdLAb4$TZgV<icxh%mB4~7Ua5^t9ZgVbS
+zWN%_+E@N+PFJ*XXW-l==FfSqsARr(hcx`NMb2@ZsW@bGiXmoUNIxjD7b1q?IZ(?OG
+zV{dIQbZKU0FEK7KFCq#cARr)kZES9HI&f%jbZ>KLZ*V;#XmoUNIxjD7b1q?IZ(?OG
+zV{dIQaA<FIZ*ypGa4#_~FfSqsARr(hcx`NMb2@l!a6KYubaZe!FE4I$E@5PEVr4F4
+zZ*4DlVQ?=oE-)`53LqdLAb4$TZgV<#ZE#IZJtAmybZ|N^FK%-#VPtP&WiDfHZ7+CX
+za4#_~FfVOwFCq#cARr)kZES9HI&*YIc62==XmoUNIxjD7b1q?IZ(?OGV{dIQcwula
+zF)lDKb5wb7WiL`?b8mHWV`W8lWo~pMA_^cNAZ2)IW;#%5cx7x@L}_hhZgXjGZapG1
+zG9n5fARuLUX=XZ5X?SI9Swv}VWo~n6Z*DyzGcqCyARr)Rcxh%jLvL(va#L_&V`V)e
+zF(L{eARu&UW@b7`ZDD6+S7~H)Xgwk`G9n5fARu&UW@b7`ZDD6+Ol59obZ9*yGcqCy
+zARr)gX=Y|RQe|^*Y;|;LZ*Em?X>>gzG9n5fARu&UW@b8AQe|^*Y;|;LZ*DyzH!?0T
+zA_^cNAarSFW;$6?Wpi(Ab#!TOZapG5GA=M83LqdLaA<FIZ*ypGa5_V8Y;ST+Z)9aX
+zA~PZiARr)cXm50Hb7*gHI!QxAP;zf(X>4UZB6CtlLLf;+LpCuvHa0CXE-@ksARr)k
+zZE!kGZ)9m^c|>7!Wj!J?FfuSLFgGnRFjO%&Iy5pmFf}bOH##sdA_^cNAb4$XI!$GC
+zVPs)+VMJkcWj!J?FfuSLFgGnRFjO%&Iy5pmFf}bOH##sdB0dTrARs(=ZE#IZI!I}A
+zbZ>HbJ_;ZpARs()WM(>3WpO?VARr(hAUtwpW;$$X3LqdLARr)fbVYV_I$>jUX>V>l
+zB5-nVWOZX@WFiV6ARr(hAais@c62&(Z)S9NVRB_bXJu}5Jt9G7W@&C|ba@~|Wpim~
+zZe?;HC{1B<ATu!_GB7eQDIy9WARr(hAais@c62&-Xk~6aA~G;CFfA}QEio`uF*iCi
+zGCD9dEigAaFfbx7J_;ZpARs(1a%5&YQ)O{J3LqdLJTG`{a7|4*NNIC)Z*qA)3Lqdn
+zFLGpNIz(l2V{&P5bZKvHJ_;Z_FLGpNI#NVNJ_<Z9csh7(aBXFDVLl2xKX`CqV{2t}
+zAZ2c3JtA@<KR%gqXg2@=0f1>kLr`;4M?xS;MME|*IW{&eGA=Oy04R}lk<TkcQ545N
+zGibz26blQBNy7>;GO<xc8cNa_Ww4of^GnSeulJrLTV<;#vGHrcU%*ybD@CMaWnoQh
+z&~u(QHIvM_oqIm_d+#~jdoH+Rc||p<dq6FidNdOD&&KEco@YGN(M&(SgkosJQ&SUE
+zw2wPncD(7bYgIn?uhpm0h61iy;UPuSP2n-&i4{|`d0V(y$t6<4r^21OTu}Jd){_pu
+z*qn9v%Vs?m9RYWvus>VI$ud@Qx+*L$C!pq+mEwKumw3~KnQ4h_;;k4&i4exmIHQaZ
+zqL)FLLv#_OTUJ!a@A=K2WJz)rnKf3?StLsilTJ~WrFvRoM)b6dJgav9|Mu0$^aY*j
+z-Z0cWS=L)Sc(&ks)3QDfE$jOTsvhB@P|9CAfPr_>H%nSz9~#!-?6RaKci?;jS{}57
+zp7@oj#NC+;yq<tB4o%lkH}CYUxX<xD#Mb1iu3&c$I^=xA!VgvzXWr3G?4bYv010qN
+zS#tmY3ljhU3ljkVnw%H_00hTLL_t(og~gXoOcZw%$3L@mn99mVicN?GvqGXmEPARz
+z4hAWvC}|s^q{hU9QNdn}F&gjQJd~s~{YTOSW2|Y2HFzPK=s`?1Ho8?0Q2uBj0bG=_
+zt{}KwzCG;hWSJe@g_`u6ykutH`@Mg^_a^Vb((xJ4Xg_}od~H8B0rkt!mlZw%egblV
+zAz&6b55#ObZ-4s>@B{6@$N$x+0n`AdZ9EPrONx`oPL8d^SdIhl+lpQ;W@unwKsRpO
+zc#|V9Z5{cG8F|O!TM*a<%mH87Q8-w?emyNME%f#E;q&>pbLS2d6B8Uec8uxiX~xIL
+zQ^j*gMBf4I1foDBQ{-RT#+^tE!_dKl2X*u2P0i2GOOo#2zpwoKe3h1#>dBKQO5*Y3
+z$J)PtzYN20XJZcd;!TDP;60!s^<W?nP*+!%yf8I2rP<k8Pu}g@w^dYB<aVMQC}5GA
+zsKr%in>KAyZ*Q-sfow1`GNR3!H+vQ6&eW(=@9S?8tp-3K5WupmOjqW>G);Ew*g<1s
+zBYXDj!LlsE;V=~y6+C_VG%5Dd&U7sG>@MJY7O&<dQ&m-!rlzJON!{Ju?wa_{&bWi0
+z-RXZDbmGJbr&j!azxx_~NOi!UWS%;8N<~FQ+OlPf4jecj%d*lGZ{508b#-+PZQHiZ
+ztLhaD1|>;n&z@CjX{nl<n^XDDReuHy0xBpdcr8iCk000e?b{_u1qB6ZiZ5Qgm{xEn
+zPo7MZ+qiL~Bq<mSs<pLMl4My{%;xt4^(%mtzy|=6lappuRTZI7h@(f3vKU^xctJQE
+zcHYOv#t4N%X>x}S9Ri@dyqq&<&d}A>#oXN73Y%R7yqlE$CFylTLxTnf2c5{gGLQ`&
+zIdVj?Sj^FL>((vR)YPQPbR><pk!T44YHMp-klN)qux{NtMIw>3Yr1gZLaJO0X!YUx
+zE2XA(xfmWEroMjRXwcu^&%nTdThy`bTtRblbH)s`x3?=C4l5iEYiMXl`}Xbg%tR;@
+za`aSJSG)Pn6*Q3;CIo2j-n|(eaCbp88dY&|ahhCHQ<I)Qf3A^{5#{9MxcRSu21vBw
+zK$2lume#Ia>yXdqOH;gI!v<AWR;seHQawFAlH~4jhjMdsy^K3;n9PFT>u@%7>Cz=h
+zQb$L}QqQgg@YxQ01I)WCWGUFadpE0Bug+F{9%uzf{uF)%UIQ&ji9jGgVPRoLO-Z9`
+z)~w<3<;$EteVWP1$ptcFV`D_4(bP{r0>8O+<p9&zDay;s(}M>O)YsRi&dyFfd-g1?
+zuNTAk__(fKy{d_c3EjJQPpej~N=<!~#Gspow1LDh4CUqJ$uvzJK72T92kPqTWSXW-
+z)ATC9I8g27fI(th`Y_dj(a}*|xpGBiWo20zkH_N*1OmEy_pW++dfdj#aE)Z{srVI%
+zap}_}FA|B&KYaMmxPJXQWo2cQl$2nao*86jW`=X;&e7S~Nnv3j-QC^)JbLsfS?I5U
+z9^emm)NTZUkAPm_PdC%=_fuP2OG!xy#l^+M<8ekuM;RO(<l41s%*@Pq`BqM1*!>+C
+z_VnHZ85ff*DC1)CCKx34yZ>!9ONDzmeMRQwwANG8TfpZmT+dtW=Vh(u{{qBcL;Juw
+R3Jw4O002ovPDHLkV1icrt||Zk
+
diff --git a/tests/compat/git/binary_mixed_delta_literal/out/favicon.png b/tests/compat/git/binary_mixed_delta_literal/out/favicon.png
new file mode 100644
index 0000000000000000000000000000000000000000..69b8613ce1506e1c92b864f91cc46df06348c62b
GIT binary patch
literal 1125
zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE0wix1Z>k4UEa{HEjtmSN`?>!lvVtU&J%W50
z7^>757#dm_7=8hT8eT9klo~KFyh>nTu$sZZAYL$MSD+10f+@+{-G$+Qd;gjJKptm-
zM`SV3z!DH<JajnI4=Bi9;_2(k{)~~ASCC_x<ZNZ2rVE}fjv*e$r(X5XkSP^m|KPtV
z`*PlvX9=xa*LrNY_H9xVr`JO1Zx1d$TO{~JLqydxb7OmJg}CU~OVJJ=6gZa6V_ECC
zHgZ*hYoM3y6NMD#TfD53-OO*lJ=fDd_e|b5|Nnb-o_YWM-<j{V_h)~9p}M%p;<ZkZ
zn{9)PvG&pxSy8@cx2#$;CGz;Yw>1*13q>1OJmp-&A)3@J_A;A0V7u0bkfJ)-`Krrb
zGey1(YybBo_r#8#3kPrfo#tA4xb3R$F4j$a9H~9huUPE$JQDstTM~O>^@<b82U_k=
zE@65+m(ghB?`h^y1+s-pSl?<!7k2nqmoA*;vE<Fq-IM+_ZSr`sZG+=;v(D3eN+oC0
z_^;Rhc~l!cFL(>Ps(;>UH<3Fx0=LBZUre@8723HjVToWun1;~KVtGY7SF1E7liM5<
zHa%KaZ1y+v;MF5PX0dXM#bh1)zI}?P`JCclPp)GktoyD%Uv%1HzQp-VwViVJr}FN4
zBVAi3pdsabJ2zzio=sD>mtWX++u%m3k>>5t|1&=?+*B*EnLW)#$^O=9J{D1Fvz#4w
zCmkr<aD9<S3fs9OvIfHSk8Avyu0IhfS?;Q2nm)_+`oHMXQ+Dr<gz-z~g!**5+<WU_
zps$~QNZqrMd3|I4<1Nb`xWruj*~f5JE5_^HK9^(hA6IHk5Bs|M@h{&WP7TY2w+GHo
zK387n_ix|iI-?hn0=tq{U-VeI`^&xPij}LEsa%!NVEz5x@xH5fY~FY63CF^jmZxq^
zTYUIU{FRyqzWqnc`>SML-}_v8Imc2?O<l%R@wTY@wU)(2tCS`_4ZE6{cqw1BRD2iP
zjs@$4qE(rfYTj0UJ0W~`+L=k7=S$=>P1;|%KWmLM+u&^dKy+fI{C57UY0UhRg-3U_
zKl;3k)jRBCi*uZh#-8L8Gwj!FXV37syULEeYD%&1+S-j<PJheNE?u)N==;@czOzo3
zdikEc(yViR*`4QA|K-nXw>gUC&wB|>?y4oO5hW>!C8<`)MX5lF!N|bKNY}tn*U&h`
z(Adh*+{(a0+rYrez#!Wq{4a`z-29Zxv`X9>q*C7l^C^QQ$cEtjw370~qEv?R@^Zb*
zyzJuS#DY}4{G#;P?`))iio&ZxB1(c1%M}WW^3yVNQWZ)n3sMy_3rdn17%JvG{=~yk
z7^b0d%K!8k&!<5Q%*xz)$=t%q!rqfbn1vNw8cYtSFe`5kQ8<0$%84Uqj>sHgKi%N5
cz)O$emAGKZCnwXXKr0wLUHx3vIVCg!0EmFw6951J

literal 0
HcmV?d00001

diff --git a/tests/compat/git/mod.rs b/tests/compat/git/mod.rs
index e7dc99b..25df395 100644
--- a/tests/compat/git/mod.rs
+++ b/tests/compat/git/mod.rs
@@ -125,6 +125,75 @@ fn junk_between_hunks() {
         .run();
 }
 
+// Mixed binary and text patch.
+//
+// Both git apply and diffy should apply both the binary and text changes.
+#[test]
+fn binary_and_text_mixed() {
+    Case::git("binary_and_text_mixed").strip(1).run();
+}
+
+// Binary patch in literal format (new file creation).
+#[test]
+#[cfg(feature = "binary")]
+fn binary_literal() {
+    Case::git("binary_literal").strip(1).run();
+}
+
+// Binary patch in delta format (modify existing file).
+#[test]
+#[cfg(feature = "binary")]
+fn binary_delta() {
+    Case::git("binary_delta").strip(1).run();
+}
+
+// Binary literal patch applied to wrong original content.
+//
+// This documents a behavioral difference:
+// - diffy: succeeds (skips validation, ignores original for literal format)
+// - git: fails (validates original content via index hash before applying)
+//
+// diffy's behavior is intentional - we don't have access to git's object database
+// to verify hashes, and for literal format the original content isn't needed anyway.
+#[test]
+#[cfg(feature = "binary")]
+fn binary_literal_wrong_original() {
+    Case::git("binary_literal_wrong_original")
+        .strip(1)
+        .expect_compat(false)
+        .run();
+}
+
+// Binary delta patch applied to wrong original content.
+//
+// Both diffy and git fail, but for different reasons:
+// - diffy: fails because delta instructions reference wrong offsets/sizes
+// - git: fails because index hash doesn't match before even trying to apply
+//
+// This test verifies diffy correctly rejects invalid delta applications.
+#[test]
+#[cfg(feature = "binary")]
+fn binary_delta_wrong_original() {
+    Case::git("binary_delta_wrong_original")
+        .strip(1)
+        .expect_success(false)
+        .run();
+}
+
+// Binary patch with mixed delta/literal format.
+//
+// Git can choose different encodings for forward and reverse transformations
+// based on which is more efficient. This patch has:
+// - forward (original -> modified): delta
+// - reverse (modified -> original): literal
+//
+// From rust-lang/rust@ad46af24 (favicon-32x32.png update).
+#[test]
+#[cfg(feature = "binary")]
+fn binary_mixed_delta_literal() {
+    Case::git("binary_mixed_delta_literal").strip(1).run();
+}
+
 // Multi-file patch with junk/preamble text between different files.
 //
 // git apply behavior: Ignores content between `diff --git` boundaries.
diff --git a/tests/compat/main.rs b/tests/compat/main.rs
index e35ed07..2c0afe7 100644
--- a/tests/compat/main.rs
+++ b/tests/compat/main.rs
@@ -20,25 +20,25 @@
 //!
 //! ```sh
 //! # Run all compat tests
-//! cargo test --test compat
+//! cargo test --test compat -F binary
 //!
 //! # Run with reference tool comparison (CI mode)
-//! CI=1 cargo test --test compat
+//! CI=1 cargo test --test compat -F binary
 //!
 //! # For Nix users, run this to ensure you have GNU patch
-//! CI=1 nix shell nixpkgs#gnupatch -c cargo test --test compat
+//! CI=1 nix shell nixpkgs#gnupatch -c cargo test --test compat -F binary
 //! ```
 //!
 //! ## Regenerating snapshots
 //!
 //! ```sh
-//! SNAPSHOTS=overwrite cargo test --test compat
+//! SNAPSHOTS=overwrite cargo test --test compat -F binary
 //! ```
 //!
 //! ## Adding new test cases
 //!
 //! 1. Create `case_name/in/` with input file(s) and `foo.patch`
-//! 2. Run `SNAPSHOTS=overwrite cargo test --test compat` to generate `out/`
+//! 2. Run `SNAPSHOTS=overwrite cargo test --test compat -F binary` to generate `out/`
 //! 3. Add `#[test] fn case_name() { Case::{gnu_patch,git}(...).run(); }` in the module
 //!
 //! For failure tests, use `.expect_success(false)` and skip step 2.
diff --git a/tests/replay.rs b/tests/replay.rs
index 4996151..8a50065 100644
--- a/tests/replay.rs
+++ b/tests/replay.rs
@@ -6,7 +6,7 @@
 //! ## Usage
 //!
 //! ```console
-//! $ cargo test --test replay -- --ignored --nocapture
+//! $ cargo test --test replay -F binary -- --ignored --nocapture
 //! ```
 //!
 //! ## Environment Variables
@@ -59,7 +59,10 @@ use std::{
     sync::Mutex,
 };
 
-use diffy::patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet};
+use diffy::{
+    binary::BinaryPatch,
+    patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet},
+};
 use rayon::prelude::*;
 
 /// Persistent `git cat-file --batch` process for fast object lookups.
@@ -325,13 +328,10 @@ fn process_commit(
 
     // UniDiff format cannot express pure renames (no ---/+++ headers).
     // Use `--no-renames` to represent them as delete + create instead.
-    // GitDiff mode handles renames via extended headers natively.
+    // GitDiff mode uses `--binary` to get actual binary patch data.
     let diff_output = match mode {
         TestMode::UniDiff => git(repo, &["diff", "--no-renames", parent, child]),
-        // TODO: pass `--binary` once binary patch support lands,
-        // so binary files get actual delta/literal data instead of
-        // "Binary files differ" markers.
-        TestMode::GitDiff => git(repo, &["diff", parent, child]),
+        TestMode::GitDiff => git(repo, &["diff", "--binary", parent, child]),
     };
 
     if diff_output.is_empty() {
@@ -381,14 +381,7 @@ fn process_commit(
             text_files + type_changes
         }
         TestMode::GitDiff => {
-            // Can't use `--numstat` for GitDiff: it shows `-\t-\t` for both
-            // actual binary diffs AND pure binary renames (100% similarity).
-            // Parser correctly handles pure renames (rename headers, no binary content).
-            //
-            // Use `--raw` for total count, subtract actual binary markers from diff.
-            //
-            // TODO: once `--binary` is passed above, count ALL `--raw`
-            // entries — every file will have patch data (delta, literal, or text).
+            // With `--binary`, all files including binary ones have patch data.
             let raw = git(repo, &["diff", "--raw", parent, child]);
             let (mut total, mut type_changes) = (0, 0);
             for line in raw.lines().filter(|l| !l.is_empty()) {
@@ -397,12 +390,7 @@ fn process_commit(
                     type_changes += 1;
                 }
             }
-            let binary = diff_output
-                .lines()
-                .filter(|l| l.starts_with("Binary files ") || l.starts_with("GIT binary patch"))
-                .count();
-            skipped += binary;
-            total - binary + type_changes
+            total + type_changes
         }
     };
 
@@ -512,10 +500,50 @@ fn process_commit(
                     );
                 }
             }
-            PatchKind::Binary(_) => {
-                // Binary patch application not yet wired up in replay tests.
-                // Will be done once the `binary` Cargo feature is added.
+            PatchKind::Binary(BinaryPatch::Marker) => {
+                // `Binary files differ` marker - no actual data to apply
                 skipped += 1;
+                continue;
+            }
+            PatchKind::Binary(patch) => {
+                // Get content as bytes
+                let base_content = if let Some(path) = base_path {
+                    let Some(content) = cat.get(parent, path) else {
+                        skipped += 1;
+                        continue;
+                    };
+                    content
+                } else {
+                    Vec::new()
+                };
+
+                let expected_content = if let Some(path) = target_path {
+                    let Some(content) = cat.get(child, path) else {
+                        skipped += 1;
+                        continue;
+                    };
+                    content
+                } else {
+                    Vec::new()
+                };
+
+                let result = match patch.apply(&base_content) {
+                    Ok(r) => r,
+                    Err(e) => {
+                        panic!(
+                            "Failed to apply binary patch at {parent_short}..{child_short} for {desc}: {e}"
+                        );
+                    }
+                };
+
+                if result != expected_content {
+                    panic!(
+                        "Binary content mismatch at {parent_short}..{child_short} for {desc}\n\n\
+                        Expected {} bytes, got {} bytes",
+                        expected_content.len(),
+                        result.len()
+                    );
+                }
             }
         }
 

From c255bbf9ae7cae1a18022531575d71c030362cc2 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Mon, 13 Apr 2026 19:58:59 -0400
Subject: [PATCH 17/26] refactor(patch_set): make free helpers generic over
 Text

Preparation for `PatchSet::parse_bytes(&[u8])` support.

No behavior change.
---
 src/patch_set/parse.rs | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs
index b089479..5e6aa5e 100644
--- a/src/patch_set/parse.rs
+++ b/src/patch_set/parse.rs
@@ -6,7 +6,7 @@ use super::{
 };
 use crate::binary::{parse_binary_patch, BinaryPatch};
 use crate::patch::parse::parse_one;
-use crate::utils::escaped_filename;
+use crate::utils::{escaped_filename, Text};
 use crate::Patch;
 
 use std::borrow::Cow;
@@ -284,14 +284,13 @@ impl<'a> Iterator for PatchSet<'a> {
 ///
 /// A patch header starts with `--- ` or `+++ ` (the file path lines).
 /// Returns `None` if no header is found.
-fn find_patch_start(input: &str) -> Option<usize> {
+fn find_patch_start<T: Text + ?Sized>(input: &T) -> Option<usize> {
     let mut offset = 0;
     for line in input.lines() {
         if line.starts_with(ORIGINAL_PREFIX) || line.starts_with(MODIFIED_PREFIX) {
             return Some(offset);
         }
         offset += line.len();
-        offset += line_ending_len(&input[offset..]);
     }
     None
 }
@@ -311,27 +310,29 @@ fn find_patch_start(input: &str) -> Option<usize> {
 /// > The log message and the patch are separated by a line with a three-dash line.
 ///
 /// [`git format-patch`]: https://git-scm.com/docs/git-format-patch
-fn strip_email_preamble(input: &str) -> &str {
+fn strip_email_preamble<T: Text + ?Sized>(input: &T) -> &T {
     // only strip preamble for mbox-formatted input
     if !input.starts_with("From ") {
         return input;
     }
 
     match input.find(EMAIL_PREAMBLE_SEPARATOR) {
-        Some(pos) => &input[pos + EMAIL_PREAMBLE_SEPARATOR.len()..],
+        Some(pos) => {
+            let (_, rest) = input.split_at(pos + EMAIL_PREAMBLE_SEPARATOR.len());
+            rest
+        }
         None => input,
     }
 }
 
 /// Finds the byte offset of the first `diff --git` line in `input`.
-fn find_gitdiff_start(input: &str) -> Option<usize> {
+fn find_gitdiff_start<T: Text + ?Sized>(input: &T) -> Option<usize> {
     let mut offset = 0;
     for line in input.lines() {
         if line.starts_with("diff --git ") {
             return Some(offset);
         }
         offset += line.len();
-        offset += line_ending_len(&input[offset..]);
     }
     None
 }

From 8f5c1d1198478f66279fdffabec3bc32c4abc8f6 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Mon, 13 Apr 2026 20:27:26 -0400
Subject: [PATCH 18/26] refactor(patch_set): make GitHeader::parse generic over
 Text

Assumption: Header lines are always ASCII
(with `core.quotePath=true`, git's default).
---
 src/patch_set/parse.rs | 58 ++++++++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 27 deletions(-)

diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs
index 5e6aa5e..78d8611 100644
--- a/src/patch_set/parse.rs
+++ b/src/patch_set/parse.rs
@@ -401,42 +401,51 @@ impl<'a> GitHeader<'a> {
     /// or the next `diff --git`).
     ///
     /// Returns the parsed header and the number of bytes consumed.
-    fn parse(input: &'a str) -> (Self, usize) {
+    ///
+    /// Assumption: Header lines are always ASCII
+    /// (with `core.quotePath=true`, git's default).
+    fn parse<T: Text + ?Sized>(input: &'a T) -> (Self, usize) {
         let mut header = GitHeader::default();
         let mut consumed = 0;
 
         for line in input.lines() {
-            if let Some(rest) = line.strip_prefix("diff --git ") {
+            let trimmed = strip_line_ending(line);
+            let Some(s) = trimmed.as_str() else {
+                // Non-UTF-8 line — not a recognized header
+                break;
+            };
+
+            if let Some(rest) = s.strip_prefix("diff --git ") {
                 // Only accept the first `diff --git` line.
                 // A second one means we've reached the next patch.
                 if header.diff_git_line.is_some() {
                     break;
                 }
                 header.diff_git_line = Some(rest);
-            } else if let Some(path) = line.strip_prefix("rename from ") {
+            } else if let Some(path) = s.strip_prefix("rename from ") {
                 header.rename_from = Some(path);
-            } else if let Some(path) = line.strip_prefix("rename to ") {
+            } else if let Some(path) = s.strip_prefix("rename to ") {
                 header.rename_to = Some(path);
-            } else if let Some(path) = line.strip_prefix("copy from ") {
+            } else if let Some(path) = s.strip_prefix("copy from ") {
                 header.copy_from = Some(path);
-            } else if let Some(path) = line.strip_prefix("copy to ") {
+            } else if let Some(path) = s.strip_prefix("copy to ") {
                 header.copy_to = Some(path);
-            } else if let Some(mode) = line.strip_prefix("old mode ") {
+            } else if let Some(mode) = s.strip_prefix("old mode ") {
                 header.old_mode = Some(mode);
-            } else if let Some(mode) = line.strip_prefix("new mode ") {
+            } else if let Some(mode) = s.strip_prefix("new mode ") {
                 header.new_mode = Some(mode);
-            } else if let Some(mode) = line.strip_prefix("new file mode ") {
+            } else if let Some(mode) = s.strip_prefix("new file mode ") {
                 header.new_file_mode = Some(mode);
-            } else if let Some(mode) = line.strip_prefix("deleted file mode ") {
+            } else if let Some(mode) = s.strip_prefix("deleted file mode ") {
                 header.deleted_file_mode = Some(mode);
-            } else if line.starts_with("index ")
-                || line.starts_with("similarity index ")
-                || line.starts_with("dissimilarity index ")
+            } else if s.starts_with("index ")
+                || s.starts_with("similarity index ")
+                || s.starts_with("dissimilarity index ")
             {
                 // Recognized but nothing to extract.
-            } else if line.starts_with("Binary files ") {
+            } else if s.starts_with("Binary files ") {
                 header.is_binary_marker = true;
-            } else if line.starts_with("GIT binary patch") {
+            } else if s.starts_with("GIT binary patch") {
                 header.binary_patch_offset = Some(consumed);
             } else {
                 // Unrecognized line: End of extended headers
@@ -445,7 +454,6 @@ impl<'a> GitHeader<'a> {
             }
 
             consumed += line.len();
-            consumed += line_ending_len(&input[consumed..]);
         }
 
         (header, consumed)
@@ -751,16 +759,12 @@ pub(crate) fn extract_file_op_unidiff<'a>(
     }
 }
 
-/// Returns the length of the line ending at the start of `s`.
+/// Strips the trailing `\n` from a line yielded by [`Text::lines`].
 ///
-/// `.lines()` strips line endings, so callers tracking byte offsets need to
-/// account for the `\r\n` or `\n` that was consumed.
-fn line_ending_len(s: &str) -> usize {
-    if s.starts_with("\r\n") {
-        2
-    } else if s.starts_with('\n') {
-        1
-    } else {
-        0
-    }
+/// [`Text::lines`] includes line endings; strip for matching.
+fn strip_line_ending<T: Text + ?Sized>(line: &T) -> &T {
+    // TODO: GNU patch strips trailing CRs from CRLF patches automatically.
+    // We should consider adding compat tests for GNU patch.
+    // And `git apply` seems to reject. Worth adding tests as well.
+    line.strip_suffix("\n").unwrap_or(line)
 }

From 79b3804e3baf5ad2a0acd25e2ec785fe54900d70 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Mon, 13 Apr 2026 21:02:48 -0400
Subject: [PATCH 19/26] refactor(patch_set): extract `PatchSet` methods to free
 fns

Move dispatch logic to free functions,
so we can have private trait bound on them.

* `next_gitdiff_patch`
* `next_unidiff_patch`
* `Iterator::next` -> `next_patch`
---
 src/patch_set/parse.rs | 368 ++++++++++++++++++++---------------------
 1 file changed, 183 insertions(+), 185 deletions(-)

diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs
index 78d8611..fc62424 100644
--- a/src/patch_set/parse.rs
+++ b/src/patch_set/parse.rs
@@ -69,215 +69,208 @@ impl<'a> PatchSet<'a> {
             found_any: false,
         }
     }
+}
 
-    /// Creates an error with the current offset as span.
-    fn error(&self, kind: PatchSetParseErrorKind) -> PatchSetParseError {
-        PatchSetParseError::new(kind, self.offset..self.offset)
-    }
+impl<'a> Iterator for PatchSet<'a> {
+    type Item = Result<FilePatch<'a, str>, PatchSetParseError>;
 
-    fn next_gitdiff_patch(&mut self) -> Option<Result<FilePatch<'a, str>, PatchSetParseError>> {
-        let remaining = &self.input[self.offset..];
-        let patch_start = find_gitdiff_start(remaining)?;
-        self.offset += patch_start;
-        self.found_any = true;
+    fn next(&mut self) -> Option<Self::Item> {
+        next_patch(self)
+    }
+}
 
-        let abs_patch_start = self.offset;
+fn next_patch<'a>(ps: &mut PatchSet<'a>) -> Option<Result<FilePatch<'a, str>, PatchSetParseError>> {
+    if ps.finished {
+        return None;
+    }
 
-        // Parse extended headers incrementally — stops at first unrecognized line
-        let (header, header_consumed) = GitHeader::parse(&self.input[self.offset..]);
-        self.offset += header_consumed;
+    let result = match ps.opts.format {
+        Format::UniDiff => next_unidiff_patch(ps),
+        Format::GitDiff => next_gitdiff_patch(ps),
+    };
 
-        // Handle "Binary files ... differ" (no patch data)
-        if header.is_binary_marker {
-            match self.opts.binary {
-                Binary::Skip => {
-                    return self.next_gitdiff_patch();
-                }
-                Binary::Fail => {
-                    let path = header.diff_git_line.unwrap_or("<unknown>").to_owned();
-                    return Some(Err(PatchSetParseError::new(
-                        PatchSetParseErrorKind::BinaryNotSupported { path },
-                        abs_patch_start..abs_patch_start,
-                    )));
-                }
-                Binary::Keep => {
-                    // FIXME: error spans point at `diff --git` line, not the specific offending line
-                    let operation = match extract_file_op_binary(&header) {
-                        Ok(op) => op,
-                        Err(mut e) => {
-                            e.set_span(abs_patch_start..abs_patch_start);
-                            return Some(Err(e));
-                        }
-                    };
-                    let (old_mode, new_mode) = match parse_file_modes(&header) {
-                        Ok(modes) => modes,
-                        Err(mut e) => {
-                            e.set_span(abs_patch_start..abs_patch_start);
-                            return Some(Err(e));
-                        }
-                    };
-                    return Some(Ok(FilePatch::new_binary(
-                        operation,
-                        BinaryPatch::Marker,
-                        old_mode,
-                        new_mode,
-                    )));
-                }
-            }
+    if result.is_none() {
+        ps.finished = true;
+        if !ps.found_any {
+            let err = PatchSetParseError::new(
+                PatchSetParseErrorKind::NoPatchesFound,
+                ps.offset..ps.offset,
+            );
+            return Some(Err(err));
         }
+    }
 
-        // Handle "GIT binary patch" (has patch data)
-        if let Some(binary_patch_start) = header.binary_patch_offset {
-            match self.opts.binary {
-                Binary::Skip => {
-                    return self.next_gitdiff_patch();
-                }
-                Binary::Fail => {
-                    let path = header.diff_git_line.unwrap_or("<unknown>").to_owned();
-                    return Some(Err(PatchSetParseError::new(
-                        PatchSetParseErrorKind::BinaryNotSupported { path },
-                        abs_patch_start..abs_patch_start,
-                    )));
-                }
-                Binary::Keep => {
-                    // GitHeader::parse consumed the marker line but not the payload.
-                    // Use the recorded offset to pass input from the marker onward.
-                    let binary_input = &self.input[abs_patch_start + binary_patch_start..];
-                    let (binary_patch, consumed) = match parse_binary_patch(binary_input) {
-                        Ok(result) => result,
-                        Err(e) => return Some(Err(e.into())),
-                    };
-                    self.offset = abs_patch_start + binary_patch_start + consumed;
-
-                    // FIXME: error spans point at `diff --git` line, not the specific offending line
-                    let operation = match extract_file_op_binary(&header) {
-                        Ok(op) => op,
-                        Err(mut e) => {
-                            e.set_span(abs_patch_start..abs_patch_start);
-                            return Some(Err(e));
-                        }
-                    };
-                    let (old_mode, new_mode) = match parse_file_modes(&header) {
-                        Ok(modes) => modes,
-                        Err(mut e) => {
-                            e.set_span(abs_patch_start..abs_patch_start);
-                            return Some(Err(e));
-                        }
-                    };
-                    return Some(Ok(FilePatch::new_binary(
-                        operation,
-                        binary_patch,
-                        old_mode,
-                        new_mode,
-                    )));
-                }
-            }
-        }
+    result
+}
 
-        // `git diff` output format is stricter.
-        // There is no preamble between Git headers and unidiff patch portion,
-        // so we safely don't perform the preamble skipping.
-        //
-        // If we did, it would fail the pure rename/mode-change operation
-        // since those ops have no unidiff patch portion
-        // and is directly followed by the next `diff --git` header.
-        let opts = crate::patch::parse::ParseOpts::default().no_skip_preamble();
-        let remaining = &self.input[self.offset..];
-        let (result, consumed) = parse_one(remaining, opts);
-        self.offset += consumed;
-        let patch = match result {
-            Ok(patch) => patch,
-            Err(e) => return Some(Err(e.into())),
-        };
+fn next_gitdiff_patch<'a>(
+    ps: &mut PatchSet<'a>,
+) -> Option<Result<FilePatch<'a, str>, PatchSetParseError>> {
+    let patch_start = find_gitdiff_start(remaining(ps))?;
+    ps.offset += patch_start;
+    ps.found_any = true;
 
-        // FIXME: error spans point at `diff --git` line, not the specific offending line
-        let operation = match extract_file_op_gitdiff(&header, &patch) {
-            Ok(op) => op,
-            Err(mut e) => {
-                e.set_span(abs_patch_start..abs_patch_start);
-                return Some(Err(e));
-            }
-        };
+    let abs_patch_start = ps.offset;
 
-        // FIXME: error spans point at `diff --git` line, not the specific offending line
-        let (old_mode, new_mode) = match parse_file_modes(&header) {
-            Ok(modes) => modes,
-            Err(mut e) => {
-                e.set_span(abs_patch_start..abs_patch_start);
-                return Some(Err(e));
-            }
-        };
+    // Parse extended headers incrementally — stops at first unrecognized line
+    let (header, header_consumed) = GitHeader::parse(remaining(ps));
+    ps.offset += header_consumed;
 
-        Some(Ok(FilePatch::new(operation, patch, old_mode, new_mode)))
+    // Handle "Binary files ... differ" (no patch data)
+    if header.is_binary_marker {
+        match ps.opts.binary {
+            Binary::Skip => {
+                return next_gitdiff_patch(ps);
+            }
+            Binary::Fail => {
+                let path = header.diff_git_line.unwrap_or("<unknown>").to_owned();
+                return Some(Err(PatchSetParseError::new(
+                    PatchSetParseErrorKind::BinaryNotSupported { path },
+                    abs_patch_start..abs_patch_start,
+                )));
+            }
+            Binary::Keep => {
+                // FIXME: error spans point at `diff --git` line, not the specific offending line
+                let operation = match extract_file_op_binary(&header) {
+                    Ok(op) => op,
+                    Err(mut e) => {
+                        e.set_span(abs_patch_start..abs_patch_start);
+                        return Some(Err(e));
+                    }
+                };
+                let (old_mode, new_mode) = match parse_file_modes(&header) {
+                    Ok(modes) => modes,
+                    Err(mut e) => {
+                        e.set_span(abs_patch_start..abs_patch_start);
+                        return Some(Err(e));
+                    }
+                };
+                return Some(Ok(FilePatch::new_binary(
+                    operation,
+                    BinaryPatch::Marker,
+                    old_mode,
+                    new_mode,
+                )));
+            }
+        }
     }
 
-    fn next_unidiff_patch(&mut self) -> Option<Result<FilePatch<'a, str>, PatchSetParseError>> {
-        let remaining = &self.input[self.offset..];
-        if remaining.is_empty() {
-            return None;
+    // Handle "GIT binary patch" (has patch data)
+    if let Some(binary_patch_start) = header.binary_patch_offset {
+        match ps.opts.binary {
+            Binary::Skip => {
+                return next_gitdiff_patch(ps);
+            }
+            Binary::Fail => {
+                let path = header.diff_git_line.unwrap_or("<unknown>").to_owned();
+                return Some(Err(PatchSetParseError::new(
+                    PatchSetParseErrorKind::BinaryNotSupported { path },
+                    abs_patch_start..abs_patch_start,
+                )));
+            }
+            Binary::Keep => {
+                // GitHeader::parse consumed the marker line but not the payload.
+                // Use the recorded offset to pass input from the marker onward.
+                let binary_input = &ps.input[abs_patch_start + binary_patch_start..];
+                let (binary_patch, consumed) = match parse_binary_patch(binary_input) {
+                    Ok(result) => result,
+                    Err(e) => return Some(Err(e.into())),
+                };
+                ps.offset = abs_patch_start + binary_patch_start + consumed;
+
+                // FIXME: error spans point at `diff --git` line, not the specific offending line
+                let operation = match extract_file_op_binary(&header) {
+                    Ok(op) => op,
+                    Err(mut e) => {
+                        e.set_span(abs_patch_start..abs_patch_start);
+                        return Some(Err(e));
+                    }
+                };
+                let (old_mode, new_mode) = match parse_file_modes(&header) {
+                    Ok(modes) => modes,
+                    Err(mut e) => {
+                        e.set_span(abs_patch_start..abs_patch_start);
+                        return Some(Err(e));
+                    }
+                };
+                return Some(Ok(FilePatch::new_binary(
+                    operation,
+                    binary_patch,
+                    old_mode,
+                    new_mode,
+                )));
+            }
         }
+    }
 
-        let patch_start = find_patch_start(remaining)?;
-        self.found_any = true;
+    // `git diff` output format is stricter.
+    // There is no preamble between Git headers and unidiff patch portion,
+    // so we safely don't perform the preamble skipping.
+    //
+    // If we did, it would fail the pure rename/mode-change operation
+    // since those ops have no unidiff patch portion
+    // and is directly followed by the next `diff --git` header.
+    let opts = crate::patch::parse::ParseOpts::default().no_skip_preamble();
+    let (result, consumed) = parse_one(remaining(ps), opts);
+    ps.offset += consumed;
+    let patch = match result {
+        Ok(patch) => patch,
+        Err(e) => return Some(Err(e.into())),
+    };
 
-        let patch_input = &remaining[patch_start..];
+    // FIXME: error spans point at `diff --git` line, not the specific offending line
+    let operation = match extract_file_op_gitdiff(&header, &patch) {
+        Ok(op) => op,
+        Err(mut e) => {
+            e.set_span(abs_patch_start..abs_patch_start);
+            return Some(Err(e));
+        }
+    };
 
-        let opts = crate::patch::parse::ParseOpts::default();
-        let (result, consumed) = parse_one(patch_input, opts);
-        // Always advance so the iterator makes progress even on error.
-        let abs_patch_start = self.offset + patch_start;
-        self.offset += patch_start + consumed;
+    // FIXME: error spans point at `diff --git` line, not the specific offending line
+    let (old_mode, new_mode) = match parse_file_modes(&header) {
+        Ok(modes) => modes,
+        Err(mut e) => {
+            e.set_span(abs_patch_start..abs_patch_start);
+            return Some(Err(e));
+        }
+    };
 
-        let patch = match result {
-            Ok(patch) => patch,
-            Err(e) => return Some(Err(e.into())),
-        };
-        let operation = match extract_file_op_unidiff(patch.original_path(), patch.modified_path())
-        {
-            Ok(op) => op,
-            Err(mut e) => {
-                e.set_span(abs_patch_start..abs_patch_start);
-                return Some(Err(e));
-            }
-        };
+    Some(Ok(FilePatch::new(operation, patch, old_mode, new_mode)))
+}
 
-        Some(Ok(FilePatch::new(operation, patch, None, None)))
+fn next_unidiff_patch<'a>(
+    ps: &mut PatchSet<'a>,
+) -> Option<Result<FilePatch<'a, str>, PatchSetParseError>> {
+    let remaining = remaining(ps);
+    if remaining.is_empty() {
+        return None;
     }
-}
 
-impl<'a> Iterator for PatchSet<'a> {
-    type Item = Result<FilePatch<'a, str>, PatchSetParseError>;
+    let patch_start = find_patch_start(remaining)?;
+    ps.found_any = true;
 
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.finished {
-            return None;
-        }
+    let patch_input = &remaining[patch_start..];
 
-        let result = match self.opts.format {
-            Format::UniDiff => {
-                let result = self.next_unidiff_patch();
-                if result.is_none() {
-                    self.finished = true;
-                    if !self.found_any {
-                        return Some(Err(self.error(PatchSetParseErrorKind::NoPatchesFound)));
-                    }
-                }
-                result
-            }
-            Format::GitDiff => {
-                let result = self.next_gitdiff_patch();
-                if result.is_none() {
-                    self.finished = true;
-                    if !self.found_any {
-                        return Some(Err(self.error(PatchSetParseErrorKind::NoPatchesFound)));
-                    }
-                }
-                result
-            }
-        };
+    let opts = crate::patch::parse::ParseOpts::default();
+    let (result, consumed) = parse_one(patch_input, opts);
+    // Always advance so the iterator makes progress even on error.
+    let abs_patch_start = ps.offset + patch_start;
+    ps.offset += patch_start + consumed;
 
-        result
-    }
+    let patch = match result {
+        Ok(patch) => patch,
+        Err(e) => return Some(Err(e.into())),
+    };
+    let operation = match extract_file_op_unidiff(patch.original_path(), patch.modified_path()) {
+        Ok(op) => op,
+        Err(mut e) => {
+            e.set_span(abs_patch_start..abs_patch_start);
+            return Some(Err(e));
+        }
+    };
+
+    Some(Ok(FilePatch::new(operation, patch, None, None)))
 }
 
 /// Finds the byte offset of the first patch header in the input.
@@ -768,3 +761,8 @@ fn strip_line_ending<T: Text + ?Sized>(line: &T) -> &T {
     // And `git apply` seems to reject. Worth adding tests as well.
     line.strip_suffix("\n").unwrap_or(line)
 }
+
+fn remaining<'a>(ps: &PatchSet<'a>) -> &'a str {
+    let (_, rest) = ps.input.split_at(ps.offset);
+    rest
+}

From 33b4e38be48491297b3b7a129e3f62b97f80cc7c Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Tue, 14 Apr 2026 01:33:51 -0400
Subject: [PATCH 20/26] refactor: add `Text::as_str_prefix` method

Returns the longest valid UTF-8 prefix of the input.

This will be used to safely convert binary patch data to
`&str` without validating the entire remaining input.
---
 src/utils.rs | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/utils.rs b/src/utils.rs
index 4e98c95..6047050 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -144,6 +144,11 @@ pub trait Text: Eq + Hash + ToOwned {
     fn find(&self, needle: &str) -> Option<usize>;
     fn split_at(&self, mid: usize) -> (&Self, &Self);
     fn as_str(&self) -> Option<&str>;
+    /// Returns the longest valid UTF-8 prefix.
+    ///
+    /// For `str` this is the entire input.
+    /// For `[u8]` this returns up to the first invalid UTF-8 byte.
+    fn as_str_prefix(&self) -> &str;
     fn as_bytes(&self) -> &[u8];
     #[allow(unused)]
     fn lines(&self) -> LineIter<'_, Self>;
@@ -198,6 +203,10 @@ impl Text for str {
         Some(self)
     }
 
+    fn as_str_prefix(&self) -> &str {
+        self
+    }
+
     fn as_bytes(&self) -> &[u8] {
         self.as_bytes()
     }
@@ -255,6 +264,13 @@ impl Text for [u8] {
         std::str::from_utf8(self).ok()
     }
 
+    fn as_str_prefix(&self) -> &str {
+        match std::str::from_utf8(self) {
+            Ok(s) => s,
+            Err(e) => std::str::from_utf8(&self[..e.valid_up_to()]).unwrap(),
+        }
+    }
+
     fn as_bytes(&self) -> &[u8] {
         self
     }

From e50b8de7b3671ab18fe6e083771e290125cd9f8a Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Tue, 14 Apr 2026 00:00:52 -0400
Subject: [PATCH 21/26] refactor(patch_set): make `PatchSet` internal generic
 over `T: Text`

We have the assumption that file path from hunk hader is UTF-8
(This is actually not true but we can leave it for future fix)

Only the `str` constructor and Iterator impl are exposed for now.
`parse_bytes` support comes in a follow-up commit.
---
 src/patch_set/error.rs |  4 +++
 src/patch_set/parse.rs | 76 ++++++++++++++++++++++++++----------------
 2 files changed, 51 insertions(+), 29 deletions(-)

diff --git a/src/patch_set/error.rs b/src/patch_set/error.rs
index 20d7d3e..77ccd0c 100644
--- a/src/patch_set/error.rs
+++ b/src/patch_set/error.rs
@@ -78,6 +78,9 @@ pub(crate) enum PatchSetParseErrorKind {
     /// Invalid `diff --git` path.
     InvalidDiffGitPath,
 
+    /// File path contains invalid UTF-8.
+    InvalidUtf8Path,
+
     /// Binary diff not supported in current configuration.
     BinaryNotSupported { path: String },
 
@@ -96,6 +99,7 @@ impl fmt::Display for PatchSetParseErrorKind {
             Self::CreateMissingModifiedPath => write!(f, "create patch has no modified path"),
             Self::InvalidFileMode(mode) => write!(f, "invalid file mode: {mode}"),
             Self::InvalidDiffGitPath => write!(f, "invalid diff --git path"),
+            Self::InvalidUtf8Path => write!(f, "file path is not valid UTF-8"),
             Self::BinaryNotSupported { path } => {
                 write!(f, "binary diff not supported: {path}")
             }
diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs
index fc62424..c021f02 100644
--- a/src/patch_set/parse.rs
+++ b/src/patch_set/parse.rs
@@ -48,15 +48,15 @@ const EMAIL_PREAMBLE_SEPARATOR: &str = "\n---\n";
 ///     println!("{:?}", patch.operation());
 /// }
 /// ```
-pub struct PatchSet<'a> {
-    input: &'a str,
+pub struct PatchSet<'a, T: ?Sized = str> {
+    input: &'a T,
     offset: usize,
     opts: ParseOptions,
     finished: bool,
     found_any: bool,
 }
 
-impl<'a> PatchSet<'a> {
+impl<'a> PatchSet<'a, str> {
     /// Creates a streaming parser for multiple file patches.
     pub fn parse(input: &'a str, opts: ParseOptions) -> Self {
         // Strip email preamble once at construction
@@ -71,7 +71,7 @@ impl<'a> PatchSet<'a> {
     }
 }
 
-impl<'a> Iterator for PatchSet<'a> {
+impl<'a> Iterator for PatchSet<'a, str> {
     type Item = Result<FilePatch<'a, str>, PatchSetParseError>;
 
     fn next(&mut self) -> Option<Self::Item> {
@@ -79,7 +79,9 @@ impl<'a> Iterator for PatchSet<'a> {
     }
 }
 
-fn next_patch<'a>(ps: &mut PatchSet<'a>) -> Option<Result<FilePatch<'a, str>, PatchSetParseError>> {
+fn next_patch<'a, T: Text + ?Sized>(
+    ps: &mut PatchSet<'a, T>,
+) -> Option<Result<FilePatch<'a, T>, PatchSetParseError>> {
     if ps.finished {
         return None;
     }
@@ -103,9 +105,9 @@ fn next_patch<'a>(ps: &mut PatchSet<'a>) -> Option<Result<FilePatch<'a, str>, Pa
     result
 }
 
-fn next_gitdiff_patch<'a>(
-    ps: &mut PatchSet<'a>,
-) -> Option<Result<FilePatch<'a, str>, PatchSetParseError>> {
+fn next_gitdiff_patch<'a, T: Text + ?Sized>(
+    ps: &mut PatchSet<'a, T>,
+) -> Option<Result<FilePatch<'a, T>, PatchSetParseError>> {
     let patch_start = find_gitdiff_start(remaining(ps))?;
     ps.offset += patch_start;
     ps.found_any = true;
@@ -171,7 +173,9 @@ fn next_gitdiff_patch<'a>(
             Binary::Keep => {
                 // GitHeader::parse consumed the marker line but not the payload.
                 // Use the recorded offset to pass input from the marker onward.
-                let binary_input = &ps.input[abs_patch_start + binary_patch_start..];
+                let (_, binary_input) = ps.input.split_at(abs_patch_start + binary_patch_start);
+                // Binary patch data is always ASCII (base85-encoded).
+                let binary_input = binary_input.as_str_prefix();
                 let (binary_patch, consumed) = match parse_binary_patch(binary_input) {
                     Ok(result) => result,
                     Err(e) => return Some(Err(e.into())),
@@ -239,9 +243,9 @@ fn next_gitdiff_patch<'a>(
     Some(Ok(FilePatch::new(operation, patch, old_mode, new_mode)))
 }
 
-fn next_unidiff_patch<'a>(
-    ps: &mut PatchSet<'a>,
-) -> Option<Result<FilePatch<'a, str>, PatchSetParseError>> {
+fn next_unidiff_patch<'a, T: Text + ?Sized>(
+    ps: &mut PatchSet<'a, T>,
+) -> Option<Result<FilePatch<'a, T>, PatchSetParseError>> {
     let remaining = remaining(ps);
     if remaining.is_empty() {
         return None;
@@ -250,7 +254,7 @@ fn next_unidiff_patch<'a>(
     let patch_start = find_patch_start(remaining)?;
     ps.found_any = true;
 
-    let patch_input = &remaining[patch_start..];
+    let (_, patch_input) = remaining.split_at(patch_start);
 
     let opts = crate::patch::parse::ParseOpts::default();
     let (result, consumed) = parse_one(patch_input, opts);
@@ -454,9 +458,9 @@ impl<'a> GitHeader<'a> {
 }
 
 /// Determines the file operation from git headers and patch paths.
-fn extract_file_op_gitdiff<'a>(
+fn extract_file_op_gitdiff<'a, T: Text + ?Sized>(
     header: &GitHeader<'a>,
-    patch: &Patch<'a, str>,
+    patch: &Patch<'a, T>,
 ) -> Result<FileOperation<'a>, PatchSetParseError> {
     // Git headers are authoritative for rename/copy
     if let (Some(from), Some(to)) = (header.rename_from, header.rename_to) {
@@ -709,12 +713,29 @@ fn longest_common_path_suffix<'a>(a: &'a str, b: &'a str) -> Option<&'a str> {
 }
 
 /// Extracts the file operation from a patch based on its header paths.
-pub(crate) fn extract_file_op_unidiff<'a>(
-    original: Option<&Cow<'a, str>>,
-    modified: Option<&Cow<'a, str>>,
+///
+/// Converts paths from `Cow<'a, T>` to `Cow<'a, str>` via [`Text::as_str`],
+/// since [`FileOperation`] always uses `Cow<str>` for paths.
+fn extract_file_op_unidiff<'a, T: Text + ?Sized>(
+    original: Option<&Cow<'a, T>>,
+    modified: Option<&Cow<'a, T>>,
 ) -> Result<FileOperation<'a>, PatchSetParseError> {
-    let is_create = original.map(Cow::as_ref) == Some(DEV_NULL);
-    let is_delete = modified.map(Cow::as_ref) == Some(DEV_NULL);
+    let to_str = |cow: &Cow<'a, T>| -> Option<Cow<'a, str>> {
+        match cow {
+            Cow::Borrowed(b) => b.as_str().map(Cow::Borrowed),
+            Cow::Owned(_) => cow.as_ref().as_str().map(|s| Cow::Owned(s.to_owned())),
+        }
+    };
+    // TODO: support non-UTF8 paths
+    let original = original
+        .map(|cow| to_str(cow).ok_or(PatchSetParseErrorKind::InvalidUtf8Path))
+        .transpose()?;
+    let modified = modified
+        .map(|cow| to_str(cow).ok_or(PatchSetParseErrorKind::InvalidUtf8Path))
+        .transpose()?;
+
+    let is_create = original.as_deref() == Some(DEV_NULL);
+    let is_delete = modified.as_deref() == Some(DEV_NULL);
 
     if is_create && is_delete {
         return Err(PatchSetParseErrorKind::BothDevNull.into());
@@ -722,29 +743,26 @@ pub(crate) fn extract_file_op_unidiff<'a>(
 
     if is_delete {
         let path = original.ok_or(PatchSetParseErrorKind::DeleteMissingOriginalPath)?;
-        Ok(FileOperation::Delete(path.clone()))
+        Ok(FileOperation::Delete(path))
     } else if is_create {
         let path = modified.ok_or(PatchSetParseErrorKind::CreateMissingModifiedPath)?;
-        Ok(FileOperation::Create(path.clone()))
+        Ok(FileOperation::Create(path))
     } else {
         match (original, modified) {
-            (Some(original), Some(modified)) => Ok(FileOperation::Modify {
-                original: original.clone(),
-                modified: modified.clone(),
-            }),
+            (Some(original), Some(modified)) => Ok(FileOperation::Modify { original, modified }),
             (None, Some(modified)) => {
                 // No original path, but has modified path.
                 // Observed that GNU patch reads from the modified path in this case.
                 Ok(FileOperation::Modify {
                     original: modified.clone(),
-                    modified: modified.clone(),
+                    modified,
                 })
             }
             (Some(original), None) => {
                 // No modified path, but has original path.
                 Ok(FileOperation::Modify {
                     modified: original.clone(),
-                    original: original.clone(),
+                    original,
                 })
             }
             (None, None) => Err(PatchSetParseErrorKind::NoFilePath.into()),
@@ -762,7 +780,7 @@ fn strip_line_ending<T: Text + ?Sized>(line: &T) -> &T {
     line.strip_suffix("\n").unwrap_or(line)
 }
 
-fn remaining<'a>(ps: &PatchSet<'a>) -> &'a str {
+fn remaining<'a, T: Text + ?Sized>(ps: &PatchSet<'a, T>) -> &'a T {
     let (_, rest) = ps.input.split_at(ps.offset);
     rest
 }

From c189ab8008fee62f99caf113c1779837ab1bd945 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Tue, 14 Apr 2026 00:12:01 -0400
Subject: [PATCH 22/26] feat(patch_set): `PatchSet::parse_bytes` for raw byte
 input

---
 src/patch_set/parse.rs | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs
index c021f02..08502d4 100644
--- a/src/patch_set/parse.rs
+++ b/src/patch_set/parse.rs
@@ -79,6 +79,31 @@ impl<'a> Iterator for PatchSet<'a, str> {
     }
 }
 
+impl<'a> PatchSet<'a, [u8]> {
+    /// Creates a streaming parser for multiple file patches from raw bytes.
+    ///
+    /// Unlike [`PatchSet::parse`], this preserves non-UTF-8 content in hunks
+    /// (e.g., binary files that git misdetects as text).
+    pub fn parse_bytes(input: &'a [u8], opts: ParseOptions) -> Self {
+        let input = strip_email_preamble(input);
+        Self {
+            input,
+            offset: 0,
+            opts,
+            finished: false,
+            found_any: false,
+        }
+    }
+}
+
+impl<'a> Iterator for PatchSet<'a, [u8]> {
+    type Item = Result<FilePatch<'a, [u8]>, PatchSetParseError>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        next_patch(self)
+    }
+}
+
 fn next_patch<'a, T: Text + ?Sized>(
     ps: &mut PatchSet<'a, T>,
 ) -> Option<Result<FilePatch<'a, T>, PatchSetParseError>> {

From 035b7d6f7b88d22e224e1df8c6a12bb5a22f6a51 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Tue, 14 Apr 2026 01:21:13 -0400
Subject: [PATCH 23/26] test(patch_set): add unit tests for `parse_bytes`

---
 src/patch_set/tests.rs | 50 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/src/patch_set/tests.rs b/src/patch_set/tests.rs
index 0944c54..5a9951e 100644
--- a/src/patch_set/tests.rs
+++ b/src/patch_set/tests.rs
@@ -606,3 +606,53 @@ diff --git a/foo b/foo
         assert_eq!(patches.len(), 1, "binary should be skipped, text parsed");
     }
 }
+
+mod patchset_parse_bytes {
+    use super::*;
+
+    #[test]
+    fn non_utf8_hunk_content() {
+        // Hunk lines contain invalid UTF-8 bytes (0x80, 0xff).
+        let mut input: Vec<u8> = Vec::new();
+        input.extend(b"--- a/file.bin\n");
+        input.extend(b"+++ b/file.bin\n");
+        input.extend(b"@@ -1,3 +1,3 @@\n");
+        input.extend(b" valid ascii\n");
+        input.extend(b"-old \x80\xff bytes\n");
+        input.extend(b"+new \x80\xff bytes\n");
+        input.extend(b" more ascii\n");
+
+        let patches = PatchSet::parse_bytes(&input, ParseOptions::unidiff())
+            .collect::<Result<Vec<_>, _>>()
+            .unwrap();
+        assert_eq!(patches.len(), 1);
+
+        let patch = patches[0].patch().as_text().expect("expected text patch");
+        let hunks = patch.hunks();
+        assert_eq!(hunks.len(), 1);
+
+        use crate::patch::Line;
+        let lines = hunks[0].lines();
+        assert_eq!(lines.len(), 4);
+        assert_eq!(lines[0], Line::Context(b"valid ascii\n" as &[u8]));
+        assert_eq!(lines[1], Line::Delete(b"old \x80\xff bytes\n" as &[u8]));
+        assert_eq!(lines[2], Line::Insert(b"new \x80\xff bytes\n" as &[u8]));
+        assert_eq!(lines[3], Line::Context(b"more ascii\n" as &[u8]));
+    }
+
+    #[test]
+    fn non_utf8_path_returns_error() {
+        // File paths (currently) must be valid UTF-8 even in byte mode.
+        let mut input: Vec<u8> = Vec::new();
+        input.extend(b"--- a/bad\xffpath\n");
+        input.extend(b"+++ b/good\n");
+        input.extend(b"@@ -1 +1 @@\n");
+        input.extend(b"-old\n");
+        input.extend(b"+new\n");
+
+        let result: Result<Vec<_>, _> =
+            PatchSet::parse_bytes(&input, ParseOptions::unidiff()).collect();
+        let err = result.unwrap_err();
+        assert_eq!(err.kind, PatchSetParseErrorKind::InvalidUtf8Path);
+    }
+}

From 095fa47d6856f242e254ee4d346d7ea00027b8ff Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Tue, 14 Apr 2026 01:21:14 -0400
Subject: [PATCH 24/26] test(replay): switch to `PatchSet::parse_bytes~

Avoid lossy UTF-8 conversion of diff output
so non-UTF8 content round-trips correctly.

No more skips in GitDiff mode (except submodule of course)
---
 tests/replay.rs | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/tests/replay.rs b/tests/replay.rs
index 8a50065..cbf07ac 100644
--- a/tests/replay.rs
+++ b/tests/replay.rs
@@ -152,11 +152,6 @@ impl CatFile {
 
         Some(buf)
     }
-
-    /// Like [`CatFile::get`] but returns only UTF-8 string.
-    fn get_text(&mut self, rev: &str, path: &str) -> Option<String> {
-        self.get(rev, path).and_then(|b| String::from_utf8(b).ok())
-    }
 }
 
 /// Local enum for test configuration (maps to ParseOptions).
@@ -237,7 +232,7 @@ fn test_mode() -> TestMode {
     }
 }
 
-fn git(repo: &Path, args: &[&str]) -> String {
+fn git_bytes(repo: &Path, args: &[&str]) -> Vec<u8> {
     let mut cmd = Command::new("git");
     cmd.env("GIT_CONFIG_NOSYSTEM", "1");
     cmd.env("GIT_CONFIG_GLOBAL", "/dev/null");
@@ -251,7 +246,11 @@ fn git(repo: &Path, args: &[&str]) -> String {
         panic!("git {args:?} failed: {stderr}");
     }
 
-    String::from_utf8_lossy(&output.stdout).into_owned()
+    output.stdout
+}
+
+fn git(repo: &Path, args: &[&str]) -> String {
+    String::from_utf8_lossy(&git_bytes(repo, args)).into_owned()
 }
 
 /// Get the list of commits from oldest to newest.
@@ -330,8 +329,8 @@ fn process_commit(
     // Use `--no-renames` to represent them as delete + create instead.
     // GitDiff mode uses `--binary` to get actual binary patch data.
     let diff_output = match mode {
-        TestMode::UniDiff => git(repo, &["diff", "--no-renames", parent, child]),
-        TestMode::GitDiff => git(repo, &["diff", "--binary", parent, child]),
+        TestMode::UniDiff => git_bytes(repo, &["diff", "--no-renames", parent, child]),
+        TestMode::GitDiff => git_bytes(repo, &["diff", "--binary", parent, child]),
     };
 
     if diff_output.is_empty() {
@@ -404,9 +403,10 @@ fn process_commit(
         };
     }
 
-    let patchset: Vec<_> = match PatchSet::parse(&diff_output, mode.into()).collect() {
+    let patchset: Vec<_> = match PatchSet::parse_bytes(&diff_output, mode.into()).collect() {
         Ok(ps) => ps,
         Err(e) => {
+            let diff_output = String::from_utf8_lossy(&diff_output);
             panic!(
                 "Failed to parse patch for {parent_short}..{child_short}: {e}\n\n\
                 Diff:\n{diff_output}"
@@ -418,6 +418,7 @@ fn process_commit(
     // This catches both missing and spurious patches.
     if patchset.len() != expected_file_count {
         let n = patchset.len();
+        let diff_output = String::from_utf8_lossy(&diff_output);
         panic!(
             "Patch count mismatch for {parent_short}..{child_short}: \
              expected {expected_file_count} files, parsed {n} patches\n\n\
@@ -461,42 +462,43 @@ fn process_commit(
         match file_patch.patch() {
             PatchKind::Text(patch) => {
                 let base_content = if let Some(path) = base_path {
-                    let Some(content) = cat.get_text(parent, path) else {
+                    let Some(content) = cat.get(parent, path) else {
                         skipped += 1;
                         continue;
                     };
                     content
                 } else {
-                    String::new()
+                    Vec::new()
                 };
 
                 let expected_content = if let Some(path) = target_path {
-                    let Some(content) = cat.get_text(child, path) else {
+                    let Some(content) = cat.get(child, path) else {
                         skipped += 1;
                         continue;
                     };
                     content
                 } else {
-                    String::new()
+                    Vec::new()
                 };
 
-                let result = match diffy::apply(&base_content, patch) {
+                let result = match diffy::apply_bytes(&base_content, patch) {
                     Ok(r) => r,
                     Err(e) => {
+                        let base_content = String::from_utf8_lossy(&base_content);
                         panic!(
                             "Failed to apply patch at {parent_short}..{child_short} for {desc}: {e}\n\n\
-                            Patch:\n{patch}\n\n\
                             Base content:\n{base_content}"
                         );
                     }
                 };
 
                 if result != expected_content {
+                    let expected_content = String::from_utf8_lossy(&expected_content);
+                    let result = String::from_utf8_lossy(&result);
                     panic!(
                         "Content mismatch at {parent_short}..{child_short} for {desc}\n\n\
                         --- Expected ---\n{expected_content}\n\n\
-                        --- Got ---\n{result}\n\n\
-                        --- Patch ---\n{patch}"
+                        --- Got ---\n{result}"
                     );
                 }
             }

From 2edeadb8fae4d8a34797a53ae50c7ba1045c7d14 Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Tue, 14 Apr 2026 01:21:15 -0400
Subject: [PATCH 25/26] test(compat): switch to byte-aware API

---
 tests/compat/common.rs | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/tests/compat/common.rs b/tests/compat/common.rs
index 8e5fdda..83f27ad 100644
--- a/tests/compat/common.rs
+++ b/tests/compat/common.rs
@@ -89,7 +89,7 @@ impl<'a> Case<'a> {
         let case_dir = self.case_dir();
         let in_dir = case_dir.join("in");
         let patch_path = in_dir.join("foo.patch");
-        let patch = fs::read_to_string(&patch_path)
+        let patch = fs::read(&patch_path)
             .unwrap_or_else(|e| panic!("failed to read {}: {e}", patch_path.display()));
 
         let case_name = self.case_name;
@@ -167,7 +167,7 @@ impl<'a> Case<'a> {
 
 fn git_apply(
     output_dir: &Path,
-    patch: &str,
+    patch: &[u8],
     strip_level: u32,
     in_dir: &Path,
 ) -> Result<(), String> {
@@ -183,12 +183,7 @@ fn git_apply(
     cmd.stderr(Stdio::piped());
 
     let mut child = cmd.spawn().expect("failed to spawn git apply");
-    child
-        .stdin
-        .as_mut()
-        .unwrap()
-        .write_all(patch.as_bytes())
-        .unwrap();
+    child.stdin.as_mut().unwrap().write_all(patch).unwrap();
 
     let output = child.wait_with_output().unwrap();
     if output.status.success() {
@@ -331,12 +326,12 @@ fn copy_input_files_impl(src: &Path, dst: &Path, base: &Path, skip_extensions: &
 /// Apply patch using diffy to output directory.
 pub fn apply_diffy(
     in_dir: &Path,
-    patch: &str,
+    patch: &[u8],
     output_dir: &Path,
     opts: ParseOptions,
     strip_prefix: u32,
 ) -> Result<(), TestError> {
-    let patches: Vec<_> = PatchSet::parse(patch, opts)
+    let patches: Vec<_> = PatchSet::parse_bytes(patch, opts)
         .collect::<Result<_, _>>()
         .map_err(TestError::Parse)?;
 
@@ -373,11 +368,11 @@ pub fn apply_diffy(
 
         match file_patch.patch() {
             PatchKind::Text(patch) => {
-                let original = String::from_utf8(read_original()).unwrap();
+                let original = read_original();
 
-                let result = diffy::apply(&original, patch).map_err(TestError::Apply)?;
+                let result = diffy::apply_bytes(&original, patch).map_err(TestError::Apply)?;
 
-                write_modified(result.as_bytes());
+                write_modified(&result);
             }
             PatchKind::Binary(BinaryPatch::Marker) => {
                 // Dont do anything if it is just a binary patch marker.

From 05cdc37fd97cebb9d4c6ee079ab78ba6c9d844cd Mon Sep 17 00:00:00 2001
From: Weihang Lo <me@weihanglo.tw>
Date: Tue, 14 Apr 2026 01:21:16 -0400
Subject: [PATCH 26/26] test(compat): add non-UTF8 hunk tests

---
 tests/compat/git/mod.rs                                   | 7 +++++++
 tests/compat/git/non_utf8_hunk_content/in/file.bin        | 1 +
 tests/compat/git/non_utf8_hunk_content/in/foo.patch       | 6 ++++++
 tests/compat/git/non_utf8_hunk_content/out/file.bin       | 1 +
 tests/compat/gnu_patch/mod.rs                             | 7 +++++++
 tests/compat/gnu_patch/non_utf8_hunk_content/in/file.bin  | 1 +
 tests/compat/gnu_patch/non_utf8_hunk_content/in/foo.patch | 5 +++++
 tests/compat/gnu_patch/non_utf8_hunk_content/out/file.bin | 1 +
 8 files changed, 29 insertions(+)
 create mode 100644 tests/compat/git/non_utf8_hunk_content/in/file.bin
 create mode 100644 tests/compat/git/non_utf8_hunk_content/in/foo.patch
 create mode 100644 tests/compat/git/non_utf8_hunk_content/out/file.bin
 create mode 100644 tests/compat/gnu_patch/non_utf8_hunk_content/in/file.bin
 create mode 100644 tests/compat/gnu_patch/non_utf8_hunk_content/in/foo.patch
 create mode 100644 tests/compat/gnu_patch/non_utf8_hunk_content/out/file.bin

diff --git a/tests/compat/git/mod.rs b/tests/compat/git/mod.rs
index 25df395..335a63c 100644
--- a/tests/compat/git/mod.rs
+++ b/tests/compat/git/mod.rs
@@ -194,6 +194,13 @@ fn binary_mixed_delta_literal() {
     Case::git("binary_mixed_delta_literal").strip(1).run();
 }
 
+// Patch with non-UTF-8 bytes (0x80, 0xff) in hunk content.
+// Both git apply and diffy handle raw bytes correctly.
+#[test]
+fn non_utf8_hunk_content() {
+    Case::git("non_utf8_hunk_content").strip(1).run();
+}
+
 // Multi-file patch with junk/preamble text between different files.
 //
 // git apply behavior: Ignores content between `diff --git` boundaries.
diff --git a/tests/compat/git/non_utf8_hunk_content/in/file.bin b/tests/compat/git/non_utf8_hunk_content/in/file.bin
new file mode 100644
index 0000000..5ddb01f
--- /dev/null
+++ b/tests/compat/git/non_utf8_hunk_content/in/file.bin
@@ -0,0 +1 @@
+hello �� world
diff --git a/tests/compat/git/non_utf8_hunk_content/in/foo.patch b/tests/compat/git/non_utf8_hunk_content/in/foo.patch
new file mode 100644
index 0000000..0c3f72f
--- /dev/null
+++ b/tests/compat/git/non_utf8_hunk_content/in/foo.patch
@@ -0,0 +1,6 @@
+diff --git a/file.bin b/file.bin
+--- a/file.bin
++++ b/file.bin
+@@ -1 +1 @@
+-hello �� world
++hello �� universe
diff --git a/tests/compat/git/non_utf8_hunk_content/out/file.bin b/tests/compat/git/non_utf8_hunk_content/out/file.bin
new file mode 100644
index 0000000..715e61f
--- /dev/null
+++ b/tests/compat/git/non_utf8_hunk_content/out/file.bin
@@ -0,0 +1 @@
+hello �� universe
diff --git a/tests/compat/gnu_patch/mod.rs b/tests/compat/gnu_patch/mod.rs
index fe59fcb..643d16e 100644
--- a/tests/compat/gnu_patch/mod.rs
+++ b/tests/compat/gnu_patch/mod.rs
@@ -151,6 +151,13 @@ fn junk_between_hunks() {
     Case::gnu_patch("junk_between_hunks").run();
 }
 
+// Patch with non-UTF-8 bytes (0x80, 0xff) in hunk content.
+// Both GNU patch and diffy handle raw bytes correctly.
+#[test]
+fn non_utf8_hunk_content() {
+    Case::gnu_patch("non_utf8_hunk_content").run();
+}
+
 // Failure cases
 
 #[test]
diff --git a/tests/compat/gnu_patch/non_utf8_hunk_content/in/file.bin b/tests/compat/gnu_patch/non_utf8_hunk_content/in/file.bin
new file mode 100644
index 0000000..5ddb01f
--- /dev/null
+++ b/tests/compat/gnu_patch/non_utf8_hunk_content/in/file.bin
@@ -0,0 +1 @@
+hello �� world
diff --git a/tests/compat/gnu_patch/non_utf8_hunk_content/in/foo.patch b/tests/compat/gnu_patch/non_utf8_hunk_content/in/foo.patch
new file mode 100644
index 0000000..10e9997
--- /dev/null
+++ b/tests/compat/gnu_patch/non_utf8_hunk_content/in/foo.patch
@@ -0,0 +1,5 @@
+--- file.bin
++++ file.bin
+@@ -1 +1 @@
+-hello �� world
++hello �� universe
diff --git a/tests/compat/gnu_patch/non_utf8_hunk_content/out/file.bin b/tests/compat/gnu_patch/non_utf8_hunk_content/out/file.bin
new file mode 100644
index 0000000..715e61f
--- /dev/null
+++ b/tests/compat/gnu_patch/non_utf8_hunk_content/out/file.bin
@@ -0,0 +1 @@
+hello �� universe