Skip to content

Commit 7b7c81c

Browse files
committed
feat(patch): strict parsing mode (the git-apply behavior)
Adds `from_str_strict`/`from_bytes_strict` that reject orphaned hunk headers hidden behind trailing content This matches `git apply` behavior. Plain trailing junk is still accepted. The default `from_str`/`from_bytes` remain permissive (the GNU patch behavior ).
1 parent 30352b0 commit 7b7c81c

File tree

4 files changed

+93
-12
lines changed

4 files changed

+93
-12
lines changed

src/patch/error.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ pub(crate) enum ParsePatchErrorKind {
107107

108108
/// Missing newline at end of line.
109109
MissingNewline,
110+
111+
/// Orphaned hunk header found after trailing content.
112+
OrphanedHunkHeader,
110113
}
111114

112115
impl fmt::Display for ParsePatchErrorKind {
@@ -132,6 +135,7 @@ impl fmt::Display for ParsePatchErrorKind {
132135
Self::UnexpectedNoNewlineMarker => "unexpected 'No newline at end of file' line",
133136
Self::UnexpectedHunkLine => "unexpected line in hunk body",
134137
Self::MissingNewline => "missing newline",
138+
Self::OrphanedHunkHeader => "orphaned hunk header after trailing content",
135139
};
136140
write!(f, "{msg}")
137141
}

src/patch/mod.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,30 @@ use crate::utils::{byte_needs_quoting, fmt_escaped_byte, write_escaped_byte};
1616
const NO_NEWLINE_AT_EOF: &str = "\\ No newline at end of file";
1717

1818
/// Representation of all the differences between two files
19+
///
20+
/// # Parsing modes
21+
///
22+
/// `Patch` provides two parsing modes with different strictness levels,
23+
/// modeled after the behavior of GNU patch and `git apply`:
24+
///
25+
/// | Scenario | GNU patch | git apply | [`from_str`] | [`from_str_strict`] |
26+
/// |-----------------------------------|-------------|-----------|--------------|---------------------|
27+
/// | Junk after all hunks are complete | Ignores | Ignores | Ignores | Ignores |
28+
/// | Junk between hunks | Ignores[^1] | Errors | Ignores[^1] | Errors |
29+
///
30+
/// [^1]: "Ignores" here means silently stopping at the junk.
31+
/// Only hunks before it are parsed; later hunks are dropped.
32+
///
33+
/// [`from_str`] and [`from_bytes`] follow GNU patch behavior,
34+
/// silently ignoring non-patch content after a hunk's line counts are satisfied.
35+
///
36+
/// [`from_str_strict`] and [`from_bytes_strict`] follow `git apply` behavior,
37+
/// additionally rejecting orphaned hunk headers hidden behind trailing content.
38+
///
39+
/// [`from_str`]: Patch::from_str
40+
/// [`from_bytes`]: Patch::from_bytes
41+
/// [`from_str_strict`]: Patch::from_str_strict
42+
/// [`from_bytes_strict`]: Patch::from_bytes_strict
1943
#[derive(PartialEq, Eq)]
2044
pub struct Patch<'a, T: ToOwned + ?Sized> {
2145
// TODO GNU patch is able to parse patches without filename headers.
@@ -108,13 +132,31 @@ impl<'a> Patch<'a, str> {
108132
pub fn from_str(s: &'a str) -> Result<Patch<'a, str>, ParsePatchError> {
109133
parse::parse(s)
110134
}
135+
136+
/// Parse a `Patch` from a string in strict mode
137+
///
138+
/// Unlike [`Patch::from_str`],
139+
/// this rejects orphaned hunk headers hidden after trailing content,
140+
/// matching `git apply` behavior.
141+
pub fn from_str_strict(s: &'a str) -> Result<Patch<'a, str>, ParsePatchError> {
142+
parse::parse_strict(s)
143+
}
111144
}
112145

113146
impl<'a> Patch<'a, [u8]> {
114147
/// Parse a `Patch` from bytes
115148
pub fn from_bytes(s: &'a [u8]) -> Result<Patch<'a, [u8]>, ParsePatchError> {
116149
parse::parse_bytes(s)
117150
}
151+
152+
/// Parse a `Patch` from bytes in strict mode
153+
///
154+
/// Unlike [`Patch::from_bytes`],
155+
/// this rejects orphaned hunk headers hidden after trailing content,
156+
/// matching `git apply` behavior.
157+
pub fn from_bytes_strict(s: &'a [u8]) -> Result<Patch<'a, [u8]>, ParsePatchError> {
158+
parse::parse_bytes_strict(s)
159+
}
118160
}
119161

120162
impl<T: ToOwned + ?Sized> Clone for Patch<'_, T> {

src/patch/parse.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,19 @@ pub fn parse(input: &str) -> Result<Patch<'_, str>> {
6565
))
6666
}
6767

68+
pub fn parse_strict(input: &str) -> Result<Patch<'_, str>> {
69+
let mut parser = Parser::new(input);
70+
let header = patch_header(&mut parser)?;
71+
let hunks = hunks(&mut parser)?;
72+
reject_orphaned_hunk_headers(&mut parser)?;
73+
74+
Ok(Patch::new(
75+
header.0.map(convert_cow_to_str),
76+
header.1.map(convert_cow_to_str),
77+
hunks,
78+
))
79+
}
80+
6881
pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
6982
let mut parser = Parser::new(input);
7083
let header = patch_header(&mut parser)?;
@@ -73,6 +86,15 @@ pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
7386
Ok(Patch::new(header.0, header.1, hunks))
7487
}
7588

89+
pub fn parse_bytes_strict(input: &[u8]) -> Result<Patch<'_, [u8]>> {
90+
let mut parser = Parser::new(input);
91+
let header = patch_header(&mut parser)?;
92+
let hunks = hunks(&mut parser)?;
93+
reject_orphaned_hunk_headers(&mut parser)?;
94+
95+
Ok(Patch::new(header.0, header.1, hunks))
96+
}
97+
7698
// This is only used when the type originated as a utf8 string
7799
fn convert_cow_to_str(cow: Cow<'_, [u8]>) -> Cow<'_, str> {
78100
match cow {
@@ -154,6 +176,20 @@ fn verify_hunks_in_order<T: ?Sized>(hunks: &[Hunk<'_, T>]) -> bool {
154176
true
155177
}
156178

179+
/// Scans remaining lines for orphaned `@@ ` hunk headers.
180+
///
181+
/// In strict mode (git-apply behavior), trailing junk is allowed but
182+
/// an `@@ ` line hiding behind that junk indicates a lost hunk.
183+
fn reject_orphaned_hunk_headers<T: Text + ?Sized>(parser: &mut Parser<'_, T>) -> Result<()> {
184+
while let Some(line) = parser.peek() {
185+
if line.starts_with("@@ ") {
186+
return Err(parser.error(ParsePatchErrorKind::OrphanedHunkHeader));
187+
}
188+
parser.next()?;
189+
}
190+
Ok(())
191+
}
192+
157193
fn hunks<'a, T: Text + ?Sized>(parser: &mut Parser<'a, T>) -> Result<Vec<Hunk<'a, T>>> {
158194
let mut hunks = Vec::new();
159195
// Following GNU patch behavior: stop at non-@@ content.

src/patch/tests.rs

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use super::error::ParsePatchErrorKind;
2-
use super::parse::{parse, parse_bytes};
2+
use super::parse::{parse, parse_bytes, parse_bytes_strict, parse_strict};
33

44
#[test]
55
fn trailing_garbage_after_complete_hunk() {
@@ -141,8 +141,6 @@ trailing garbage
141141

142142
// Strict mode (git-apply behavior): rejects orphaned hunk headers
143143
// hidden behind trailing content, but allows plain trailing junk.
144-
// Currently all use permissive `parse`; the next commit introduces
145-
// `parse_strict` and updates assertions where behavior diverges.
146144
mod strict_mode {
147145
use super::*;
148146

@@ -157,7 +155,7 @@ mod strict_mode {
157155
+new
158156
this is trailing garbage
159157
";
160-
let patch = parse(s).unwrap();
158+
let patch = parse_strict(s).unwrap();
161159
assert_eq!(patch.hunks().len(), 1);
162160
}
163161

@@ -171,15 +169,14 @@ this is trailing garbage
171169
+new
172170
this is trailing garbage
173171
";
174-
let patch = parse_bytes(&s[..]).unwrap();
172+
let patch = parse_bytes_strict(&s[..]).unwrap();
175173
assert_eq!(patch.hunks().len(), 1);
176174
}
177175

178176
#[test]
179177
fn orphaned_hunk_header_after_junk() {
180-
// Junk between hunks hides the second @@ — strict should reject this
178+
// Junk between hunks hides the second @@ — strict rejects this
181179
// since git apply errors with "patch fragment without header".
182-
// Currently permissive: succeeds with 1 hunk.
183180
let s = "\
184181
--- a/file.txt
185182
+++ b/file.txt
@@ -191,8 +188,10 @@ not a hunk line
191188
-b
192189
+B
193190
";
194-
let patch = parse(s).unwrap();
195-
assert_eq!(patch.hunks().len(), 1);
191+
assert_eq!(
192+
parse_strict(s).unwrap_err().kind,
193+
ParsePatchErrorKind::OrphanedHunkHeader,
194+
);
196195
}
197196

198197
#[test]
@@ -204,7 +203,7 @@ not a hunk line
204203
-old
205204
+new
206205
";
207-
let patch = parse(s).unwrap();
206+
let patch = parse_strict(s).unwrap();
208207
assert_eq!(patch.hunks().len(), 1);
209208
}
210209

@@ -220,7 +219,7 @@ not a hunk line
220219
-b
221220
+B
222221
";
223-
let patch = parse(s).unwrap();
222+
let patch = parse_strict(s).unwrap();
224223
assert_eq!(patch.hunks().len(), 2);
225224
}
226225

@@ -236,7 +235,7 @@ garbage before hunk complete
236235
line 3
237236
";
238237
assert_eq!(
239-
parse(s).unwrap_err().kind,
238+
parse_strict(s).unwrap_err().kind,
240239
ParsePatchErrorKind::UnexpectedHunkLine,
241240
);
242241
}

0 commit comments

Comments
 (0)